Tweaked profile significant_difference.

- renamed to 'change_thr'
- now it represents how much the top K leading samples need to change
(in percents) in order to trigger compilation.
- extracted ProfileData & file parsing in profiler.h

Bug: 12877748
Change-Id: I10f66120dd5e68b8a690bfa0e9914c07f63c50d5
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fc1332a..d071805 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,10 +19,8 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <utils/Trace.h>
 
-#include <fstream>
 #include <vector>
 #include <unistd.h>
-#include <utility>
 
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
@@ -372,7 +370,7 @@
 
   // Read the profile file if one is provided.
   if (profile_file != "") {
-    profile_ok_ = ReadProfile(profile_file);
+    profile_ok_ = ProfileHelper::LoadProfileMap(profile_map_, profile_file);
   }
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
@@ -2030,83 +2028,6 @@
     }
   }
 
-bool CompilerDriver::ReadProfile(const std::string& filename) {
-  VLOG(compiler) << "reading profile file " << filename;
-  struct stat st;
-  int err = stat(filename.c_str(), &st);
-  if (err == -1) {
-    VLOG(compiler) << "not found";
-    return false;
-  }
-  std::ifstream in(filename.c_str());
-  if (!in) {
-    VLOG(compiler) << "profile file " << filename << " exists but can't be opened";
-    VLOG(compiler) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    VLOG(compiler) << "me: " << getuid() << ":" << getgid();
-    VLOG(compiler) << "file permissions: " << std::oct << st.st_mode;
-    VLOG(compiler) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/total/bootpath.
-    return false;
-  }
-  // This is the number of hits in all methods.
-  uint32_t total_count = 0;
-  for (int i = 0 ; i < 3; ++i) {
-    total_count += atoi(summary_info[i].c_str());
-  }
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', info);
-    if (info.size() != 3) {
-      // Malformed.
-      break;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = atoi(it->second[2].c_str());
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_ok_) {
     return true;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 802f859..d49523a 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -32,6 +32,7 @@
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -594,43 +595,9 @@
     return cfi_info_.get();
   }
 
-  // Profile data.  This is generated from previous runs of the program and stored
-  // in a file.  It is used to determine whether to compile a particular method or not.
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double usedPercent, double topKUsedPercentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
-      // TODO: currently method_size_ and count_ are unused.
-      UNUSED(method_size_);
-      UNUSED(count_);
-    }
-
-    bool IsAbove(double v) const { return usedPercent_ >= v; }
-    double GetUsedPercent() const { return usedPercent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
-
-   private:
-    std::string method_name_;    // Method name.
-    uint32_t count_;             // Number of times it has been called.
-    uint32_t method_size_;       // Size of the method on dex instructions.
-    double usedPercent_;         // Percentage of how many times this method was called.
-    double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
-                                 // methods this methods belongs to.
-  };
-
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<const std::string, ProfileData> ProfileMap;
   ProfileMap profile_map_;
   bool profile_ok_;
 
-  // Read the profile data from the given file.  Calculates the percentage for each method.
-  // Returns false if there was no profile file or it was malformed.
-  bool ReadProfile(const std::string& filename);
-
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index bab0604..15a5779 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include <unistd.h>
+#include <algorithm>
 #include <fcntl.h>
+#include <set>
+#include <unistd.h>
 
 #include "base/logging.h"
 #include "class_linker.h"
@@ -30,6 +32,7 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -230,13 +233,31 @@
   close(fd2);
 }
 
+static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) {
+#ifndef HAVE_ANDROID_OS
+  return defaultValue;
+#else
+  char buf[PROP_VALUE_MAX];
+  char* endptr;
+
+  property_get(property, buf, "");
+  double value = strtod(buf, &endptr);
+
+  if (value == 0 && endptr == buf) {
+    value = defaultValue;
+  } else if (value < minValue || value > maxValue) {
+    value = defaultValue;
+  }
+  return value;
+#endif
+}
+
 static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
     jstring javaPkgname, jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kDebugLogging = true;  // Logging useful for debugging.
 
   ScopedUtfChars filename(env, javaFilename);
-
   if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
@@ -282,7 +303,6 @@
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
     int e2 = stat(prev_profile_file.c_str(), &prevstat);
-
     if (e1 < 0) {
       // No profile file, need to run dex2oat
       if (kDebugLogging) {
@@ -290,48 +310,47 @@
       }
       return JNI_TRUE;
     }
+
     if (e2 == 0) {
       // There is a previous profile file.  Check if the profile has changed significantly.
-      // Let's use the file size as a proxy for significance.  If the new profile is 10%
-      // different in size than the the old profile then we run dex2oat.
-      double newsize = profstat.st_size;
-      double oldsize = prevstat.st_size;
-      bool need_profile = false;
+      // A change in profile is considered significant if X% (change_thr property) of the top K%
+      // (compile_thr property) samples has changed.
 
-      double ratio = 0;     // If the old file was empty and the new one not
-      if (oldsize > 0 && newsize > 0) {
-        ratio = newsize / oldsize;
-      } else if (oldsize == 0 && newsize > 0) {
-        need_profile = true;
-      } else if (oldsize > 0 && newsize == 0) {
-        // Unlikely to happen, but cover all the bases.
-        need_profile = true;
+      double topKThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.compile_thr", 10.0, 90.0, 90.0);
+      double changeThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.change_thr", 1.0, 90.0, 10.0);
+      double changePercent = 0.0;
+      std::set<std::string> newTopK, oldTopK;
+      bool newOk = ProfileHelper::LoadTopKSamples(newTopK, profile_file, topKThreshold);
+      bool oldOk = ProfileHelper::LoadTopKSamples(oldTopK, prev_profile_file, topKThreshold);
+      if (!newOk || !oldOk) {
+        if (kDebugLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: "
+                    << (newOk ?  "" : profile_file) << " " << (oldOk ? "" : prev_profile_file);
+        }
+      } else if (newTopK.empty()) {
+        if (kDebugLogging && kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file;
+        }
+        // If the new topK is empty we shouldn't optimize so we leave the changePercent at 0.0.
+      } else {
+        std::set<std::string> diff;
+        std::set_difference(newTopK.begin(), newTopK.end(), oldTopK.begin(), oldTopK.end(),
+          std::inserter(diff, diff.end()));
+        // TODO: consider using the usedPercentage instead of the plain diff count.
+        changePercent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(newTopK.size());
+        if (kDebugLogging && kVerboseLogging) {
+          std::set<std::string>::iterator end = diff.end();
+          for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
+            LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it;
+          }
+        }
       }
 
-      double significant_difference = 10.0;
-#ifdef HAVE_ANDROID_OS
-      // Switch off profiler if the dalvik.vm.profiler property has value 0.
-      char buf[PROP_VALUE_MAX];
-      property_get("dalvik.vm.profiler.dex2oat.threshold", buf, "10.0");
-      significant_difference = strtod(buf, nullptr);
-
-      // Something reasonable?
-      if (significant_difference < 1.0 || significant_difference > 90.0) {
-        significant_difference = 10.0;
-      }
-#endif      // The percentage difference that we consider as being significant.
-      double diff_hwm = 1.0 + significant_difference/10.0;
-      double diff_lwm = 1.0 - significant_difference/10.0;
-
-      if (ratio > diff_hwm || ratio < diff_lwm) {
-        need_profile = true;
-      }
-
-      if (need_profile) {
+      if (changePercent > changeThreshold) {
         if (kDebugLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file <<
-          " is significantly different from old profile file " << prev_profile_file << " (new: " <<
-          newsize << ", old: " << oldsize << ", ratio: " << ratio << ")";
+          " is significantly different from old profile file " << prev_profile_file << " (top "
+          << topKThreshold << "% samples changed in proportion of " << changePercent << "%)";
         }
         if (!defer) {
           CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 4770a54..223fe87 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -16,6 +16,7 @@
 
 #include "profiler.h"
 
+#include <fstream>
 #include <sys/uio.h>
 #include <sys/file.h>
 
@@ -579,5 +580,101 @@
     previous_[methodname] = PreviousValue(count, size);
   }
 }
-}  // namespace art
 
+bool ProfileHelper::LoadProfileMap(ProfileMap& profileMap, const std::string& fileName) {
+  LOG(VERBOSE) << "reading profile file " << fileName;
+  struct stat st;
+  int err = stat(fileName.c_str(), &st);
+  if (err == -1) {
+    LOG(VERBOSE) << "not found";
+    return false;
+  }
+  if (st.st_size == 0) {
+    return true;  // empty profiles are ok.
+  }
+  std::ifstream in(fileName.c_str());
+  if (!in) {
+    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
+    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
+    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
+    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
+    LOG(VERBOSE) << "errno: " << errno;
+    return false;
+  }
+  // The first line contains summary information.
+  std::string line;
+  std::getline(in, line);
+  if (in.eof()) {
+    return false;
+  }
+  std::vector<std::string> summary_info;
+  Split(line, '/', summary_info);
+  if (summary_info.size() != 3) {
+    // Bad summary info.  It should be count/total/bootpath.
+    return false;
+  }
+  // This is the number of hits in all methods.
+  uint32_t total_count = 0;
+  for (int i = 0 ; i < 3; ++i) {
+    total_count += atoi(summary_info[i].c_str());
+  }
+
+  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
+  // Store the info in descending order given by the most used methods.
+  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
+  ProfileSet countSet;
+  while (!in.eof()) {
+    std::getline(in, line);
+    if (in.eof()) {
+      break;
+    }
+    std::vector<std::string> info;
+    Split(line, '/', info);
+    if (info.size() != 3) {
+      // Malformed.
+      break;
+    }
+    int count = atoi(info[1].c_str());
+    countSet.insert(std::make_pair(-count, info));
+  }
+
+  uint32_t curTotalCount = 0;
+  ProfileSet::iterator end = countSet.end();
+  const ProfileData* prevData = nullptr;
+  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
+    const std::string& methodname = it->second[0];
+    uint32_t count = -it->first;
+    uint32_t size = atoi(it->second[2].c_str());
+    double usedPercent = (count * 100.0) / total_count;
+
+    curTotalCount += count;
+    // Methods with the same count should be part of the same top K percentage bucket.
+    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
+      ? prevData->GetTopKUsedPercentage()
+      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
+
+    // Add it to the profile map.
+    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
+    profileMap[methodname] = curData;
+    prevData = &curData;
+  }
+  return true;
+}
+
+bool ProfileHelper::LoadTopKSamples(std::set<std::string>& topKSamples, const std::string& fileName,
+                                    double topKPercentage) {
+  ProfileMap profileMap;
+  bool loadOk = LoadProfileMap(profileMap, fileName);
+  if (!loadOk) {
+    return false;
+  }
+  ProfileMap::iterator end = profileMap.end();
+  for (ProfileMap::iterator it = profileMap.begin(); it != end; it++) {
+    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
+      topKSamples.insert(it->first);
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
index b03b170..31fdc79 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -39,7 +39,6 @@
 }  // namespace mirror
 class Thread;
 
-
 //
 // This class holds all the results for all runs of the profiler.  It also
 // counts the number of null methods (where we can't determine the method) and
@@ -63,7 +62,7 @@
  private:
   uint32_t Hash(mirror::ArtMethod* method);
   static constexpr int kHashSize = 17;
-  Mutex& lock_;         // Reference to the main profiler lock - we don't need two of them.
+  Mutex& lock_;                   // Reference to the main profiler lock - we don't need two of them.
   uint32_t num_samples_;          // Total number of samples taken.
   uint32_t num_null_methods_;     // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;     // Number of samples in the boot path.
@@ -189,6 +188,54 @@
   DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
 };
 
+// TODO: incorporate in ProfileSampleResults
+
+// Profile data.  This is generated from previous runs of the program and stored
+// in a file.  It is used to determine whether to compile a particular method or not.
+class ProfileData {
+ public:
+  ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
+  ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
+    double usedPercent, double topKUsedPercentage) :
+    method_name_(method_name), count_(count), method_size_(method_size),
+    usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
+    // TODO: currently method_size_ and count_ are unused.
+    UNUSED(method_size_);
+    UNUSED(count_);
+  }
+
+  bool IsAbove(double v) const { return usedPercent_ >= v; }
+  double GetUsedPercent() const { return usedPercent_; }
+  uint32_t GetCount() const { return count_; }
+  double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
+
+ private:
+  std::string method_name_;    // Method name.
+  uint32_t count_;             // Number of times it has been called.
+  uint32_t method_size_;       // Size of the method on dex instructions.
+  double usedPercent_;         // Percentage of how many times this method was called.
+  double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
+                               // methods this methods belongs to.
+};
+
+// Profile data is stored in a map, indexed by the full method name.
+typedef std::map<std::string, ProfileData> ProfileMap;
+
+class ProfileHelper {
+ private:
+  ProfileHelper();
+
+ public:
+  // Read the profile data from the given file.  Calculates the percentage for each method.
+  // Returns false if there was no profile file or it was malformed.
+  static bool LoadProfileMap(ProfileMap& profileMap, const std::string& fileName);
+
+  // Read the profile data from the given file and computes the group that comprise
+  // topKPercentage of the total used methods.
+  static bool LoadTopKSamples(std::set<std::string>& topKMethods, const std::string& fileName,
+                              double topKPercentage);
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_PROFILER_H_