Profile: repurposed kTresholdPercent

Previously kTresholdPercent was the percentage of samples of the total
that a method must comprise before compiling.

I changed it to mean the threshold for a running total...i.e. compile
all the methods that comprise K% of the samples cumulatively.

(in the process fixed ProfileData#percent doc and changed its name)

Bug: 12877748
Change-Id: Ib0e18e525a16c11b189afc3d840c09183ac629de
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 4f98ba4..5c790be 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,9 +19,10 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <utils/Trace.h>
 
+#include <fstream>
 #include <vector>
 #include <unistd.h>
-#include <fstream>
+#include <utility>
 
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
@@ -2054,6 +2055,9 @@
   }
 
   // Now read each line until the end of file.  Each line consists of 3 fields separated by /
+  // Store the info in desceding order given by the most used methods
+  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
+  ProfileSet countSet;
   while (!in.eof()) {
     std::getline(in, line);
     if (in.eof()) {
@@ -2065,12 +2069,29 @@
       // Malformed.
       break;
     }
-    const std::string& methodname = info[0];
-    uint32_t count = atoi(info[1].c_str());
-    uint32_t size = atoi(info[2].c_str());
-    double percent = (count * 100.0) / total_count;
+    int count = atoi(info[1].c_str());
+    countSet.insert(std::make_pair(-count, info));
+  }
+
+  uint32_t curTotalCount = 0;
+  ProfileSet::iterator end = countSet.end();
+  const ProfileData* prevData = nullptr;
+  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
+    const std::string& methodname = it->second[0];
+    uint32_t count = -it->first;
+    uint32_t size = atoi(it->second[2].c_str());
+    double usedPercent = (count * 100.0) / total_count;
+
+    curTotalCount += count;
+    // Methods with the same count should be part of the same top K percentage bucket
+    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
+      ? prevData->GetTopKUsedPercentage()
+      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
+
     // Add it to the profile map
-    profile_map_[methodname] = ProfileData(methodname, count, size, percent);
+    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
+    profile_map_[methodname] = curData;
+    prevData = &curData;
   }
   return true;
 }
@@ -2079,7 +2100,8 @@
   if (!profile_ok_) {
     return true;
   }
-  constexpr double kThresholdPercent = 2.0;      // Anything above this threshold will be compiled.
+  // Methods that comprise kThresholdPercent % of the total samples will be compiled
+  constexpr double kThresholdPercent = 90.0;
 
   // First find the method in the profile map.
   ProfileMap::iterator i = profile_map_.find(method_name);
@@ -2089,13 +2111,15 @@
     return true;
   }
   const ProfileData& data = i->second;
-  bool compile = data.IsAbove(kThresholdPercent);
+  // Compare against the start of the topK percentage bucket just in case the threshold
+  // falls inside a bucket
+  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent() <= kThresholdPercent;
   if (compile) {
-    LOG(INFO) << "compiling method " << method_name << " because its usage is " <<
-        data.GetPercent() << "%";
+    LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
+        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%";
   } else {
     VLOG(compiler) << "not compiling method " << method_name << " because usage is too low ("
-        << data.GetPercent() << "%)";
+        << data.GetUsedPercent() << "%)";
   }
   return !compile;
 }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 256aa46..4257241 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -598,22 +598,28 @@
   // in a file.  It is used to determine whether to compile a particular method or not.
   class ProfileData {
    public:
-    ProfileData() : count_(0), method_size_(0), percent_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size, double percent) :
-      method_name_(method_name), count_(count), method_size_(method_size), percent_(percent) {
+    ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
+    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
+      double usedPercent, double topKUsedPercentage) :
+      method_name_(method_name), count_(count), method_size_(method_size),
+      usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
       // TODO: currently method_size_ and count_ are unused.
       UNUSED(method_size_);
       UNUSED(count_);
     }
 
-    bool IsAbove(double v) const { return percent_ >= v; }
-    double GetPercent() const { return percent_; }
+    bool IsAbove(double v) const { return usedPercent_ >= v; }
+    double GetUsedPercent() const { return usedPercent_; }
+    uint32_t GetCount() const { return count_; }
+    double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
 
    private:
-    std::string method_name_;   // Method name.
-    uint32_t count_;            // Number number of times it has been called.
-    uint32_t method_size_;      // Size of the method on dex instructions.
-    double percent_;            // Percentage of time spent in this method.
+    std::string method_name_;    // Method name.
+    uint32_t count_;             // Number of times it has been called.
+    uint32_t method_size_;       // Size of the method on dex instructions.
+    double usedPercent_;         // Percentage of how many times this method was called.
+    double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
+                                 // methods this methods belongs to.
   };
 
   // Profile data is stored in a map, indexed by the full method name.