Add support for aggregation counters in the profile

The profile can be configured to keep track of aggregation counters for
methods and classes.

On device, this is particular useful for the boot image profile. When
aggregating multiple profiles into one and keep track of counters we can
tell:
1. how many times we aggregated a method or a class. This tells us how
popular is a particular method or class amongst the apps.
2. how many times we aggregated the profile. This tells how many apps
contributed to the profile (on a single device).

NOTE: This expands the memory footprint and makes the aggregation slower.
As such it is disabled by default and not intended to use for the regular
profiling path of apps in prod.

Test: profile_compilation_info_test, profile_assistant_test
Bug: 112617266

Change-Id: I731abf31f65b12bf405c77fb7803bb0bda3b9908
diff --git a/libprofile/profile/profile_compilation_info.cc b/libprofile/profile/profile_compilation_info.cc
index 02f6344..9b32b9e 100644
--- a/libprofile/profile/profile_compilation_info.cc
+++ b/libprofile/profile/profile_compilation_info.cc
@@ -58,6 +58,12 @@
 // profile_compilation_info object. All the profile line headers are now placed together
 // before corresponding method_encodings and class_ids.
 const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '1', '0', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersionWithCounters[] = { '5', '0', '0', '\0' };
+
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersion) == 4,
+              "Invalid profile version size");
+static_assert(sizeof(ProfileCompilationInfo::kProfileVersionWithCounters) == 4,
+              "Invalid profile version size");
 
 // The name of the profile entry in the dex metadata file.
 // DO NOT CHANGE THIS! (it's similar to classes.dex in the apk files).
@@ -84,18 +90,31 @@
   return kDebugIgnoreChecksum || dex_file_checksum == checksum;
 }
 
+// For storage efficiency we store aggregation counts of up to at most 2^16.
+static uint16_t IncrementAggregationCounter(uint16_t counter, uint16_t value) {
+  if (counter < (std::numeric_limits<uint16_t>::max() - value)) {
+    return counter + value;
+  } else {
+    return std::numeric_limits<uint16_t>::max();
+  }
+}
+
 ProfileCompilationInfo::ProfileCompilationInfo(ArenaPool* custom_arena_pool)
     : default_arena_pool_(),
       allocator_(custom_arena_pool),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::ProfileCompilationInfo()
     : default_arena_pool_(),
       allocator_(&default_arena_pool_),
       info_(allocator_.Adapter(kArenaAllocProfile)),
-      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)) {
+      profile_key_map_(std::less<const std::string>(), allocator_.Adapter(kArenaAllocProfile)),
+      aggregation_count_(0) {
+  InitProfileVersionInternal(kProfileVersion);
 }
 
 ProfileCompilationInfo::~ProfileCompilationInfo() {
@@ -326,13 +345,15 @@
 /**
  * Serialization format:
  * [profile_header, zipped[[profile_line_header1, profile_line_header2...],[profile_line_data1,
- *    profile_line_data2...]]]
+ *    profile_line_data2...]],global_aggregation_counter]
  * profile_header:
  *   magic,version,number_of_dex_files,uncompressed_size_of_zipped_data,compressed_data_size
  * profile_line_header:
  *   dex_location,number_of_classes,methods_region_size,dex_location_checksum,num_method_ids
  * profile_line_data:
- *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap
+ *   method_encoding_1,method_encoding_2...,class_id1,class_id2...,startup/post startup bitmap,
+ *   num_classes,class_counters,num_methods,method_counters
+ * The aggregation counters are only stored if the profile version is kProfileVersionWithCounters.
  * The method_encoding is:
  *    method_id,number_of_inline_caches,inline_cache1,inline_cache2...
  * The inline_cache is:
@@ -355,7 +376,7 @@
   if (!WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic))) {
     return false;
   }
-  if (!WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion))) {
+  if (!WriteBuffer(fd, version_, sizeof(version_))) {
     return false;
   }
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
@@ -370,7 +391,17 @@
         sizeof(uint16_t) * dex_data.class_set.size() +
         methods_region_size +
         dex_data.bitmap_storage.size();
+    if (StoresAggregationCounters()) {
+      required_capacity += sizeof(uint16_t) +  // num class counters
+          sizeof(uint16_t) * dex_data.class_set.size() +
+          sizeof(uint16_t) +  // num method counter
+          sizeof(uint16_t) * dex_data_ptr->GetNumMethodCounters();
+    }
   }
+  if (StoresAggregationCounters()) {
+    required_capacity += sizeof(uint16_t);  // global counter
+  }
+
   // Allow large profiles for non target builds for the case where we are merging many profiles
   // to generate a boot image profile.
   if (kIsTargetBuild && required_capacity > kProfileSizeErrorThresholdInBytes) {
@@ -443,6 +474,24 @@
     buffer.insert(buffer.end(),
                   dex_data.bitmap_storage.begin(),
                   dex_data.bitmap_storage.end());
+
+    if (StoresAggregationCounters()) {
+      AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+      for (const auto& class_id : dex_data.class_set) {
+        uint16_t type_idx = class_id.index_;
+        AddUintToBuffer(&buffer, dex_data.class_counters[type_idx]);
+      }
+      AddUintToBuffer(&buffer, dex_data.GetNumMethodCounters());
+      for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+        if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+          AddUintToBuffer(&buffer, dex_data.method_counters[method_idx]);
+        }
+      }
+    }
+  }
+
+  if (StoresAggregationCounters()) {
+    AddUintToBuffer(&buffer, aggregation_count_);
   }
 
   uint32_t output_size = 0;
@@ -583,7 +632,8 @@
         profile_key,
         checksum,
         profile_index,
-        num_method_ids);
+        num_method_ids,
+        StoresAggregationCounters());
     info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
@@ -943,7 +993,7 @@
   // Read magic and version
   const size_t kMagicVersionSize =
     sizeof(kProfileMagic) +
-    sizeof(kProfileVersion) +
+    kProfileVersionSize +
     sizeof(uint8_t) +  // number of dex files
     sizeof(uint32_t) +  // size of uncompressed profile data
     sizeof(uint32_t);  // size of compressed profile data
@@ -959,10 +1009,18 @@
     *error = "Profile missing magic";
     return kProfileLoadVersionMismatch;
   }
-  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+  if (safe_buffer.CountUnreadBytes() < kProfileVersionSize) {
+     *error = "Cannot read profile version";
+     return kProfileLoadBadData;
+  }
+  memcpy(version_, safe_buffer.GetCurrentPtr(), kProfileVersionSize);
+  safe_buffer.Advance(kProfileVersionSize);
+  if ((memcmp(version_, kProfileVersion, kProfileVersionSize) != 0) &&
+      (memcmp(version_, kProfileVersionWithCounters, kProfileVersionSize) != 0)) {
     *error = "Profile version mismatch";
     return kProfileLoadVersionMismatch;
   }
+
   if (!safe_buffer.ReadUintAndAdvance<uint8_t>(number_of_dex_files)) {
     *error = "Cannot read the number of dex files";
     return kProfileLoadBadData;
@@ -1047,6 +1105,7 @@
     }
   }
 
+  // Read method bitmap.
   const size_t bytes = data->bitmap_storage.size();
   if (buffer.CountUnreadBytes() < bytes) {
     *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
@@ -1055,10 +1114,51 @@
   const uint8_t* base_ptr = buffer.GetCurrentPtr();
   std::copy_n(base_ptr, bytes, data->bitmap_storage.data());
   buffer.Advance(bytes);
-  // Read method bitmap.
+
+  if (StoresAggregationCounters()) {
+    ReadAggregationCounters(buffer, *data, error);
+  }
+
   return kProfileLoadSuccess;
 }
 
+bool ProfileCompilationInfo::ReadAggregationCounters(
+      SafeBuffer& buffer,
+      DexFileData& dex_data,
+      /*out*/std::string* error) {
+  size_t unread_bytes_before_op = buffer.CountUnreadBytes();
+  size_t expected_byte_count = sizeof(uint16_t) *
+      (dex_data.class_set.size() + dex_data.method_map.size() + 2);
+  if (unread_bytes_before_op < expected_byte_count) {
+    *error += "Profile EOF reached prematurely for ReadAggregationCounters";
+    return false;
+  }
+
+  uint16_t num_class_counters;
+  READ_UINT(uint16_t, buffer, num_class_counters, error);
+  if (num_class_counters != dex_data.class_set.size()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (const auto& class_it : dex_data.class_set) {
+    READ_UINT(uint16_t, buffer, dex_data.class_counters[class_it.index_], error);
+  }
+
+  uint16_t num_method_counters;
+  READ_UINT(uint16_t, buffer, num_method_counters, error);
+  if (num_method_counters != dex_data.GetNumMethodCounters()) {
+    *error = "Invalid class size when reading counters";
+    return false;
+  }
+  for (uint16_t method_idx = 0; method_idx < dex_data.num_method_ids; method_idx++) {
+    if (dex_data.GetHotnessInfo(method_idx).IsInProfile()) {
+      READ_UINT(uint16_t, buffer, dex_data.method_counters[method_idx], error);
+    }
+  }
+
+  return true;
+}
+
 // TODO(calin): Fix this API. ProfileCompilationInfo::Load should be static and
 // return a unique pointer to a ProfileCompilationInfo upon success.
 bool ProfileCompilationInfo::Load(
@@ -1370,9 +1470,17 @@
     }
   }
 
+  if (StoresAggregationCounters()) {
+    if (!uncompressed_data.ReadUintAndAdvance<uint16_t>(&aggregation_count_)) {
+      *error = "Cannot read the global aggregation count";
+      return kProfileLoadBadData;
+    }
+  }
+
   // Check that we read everything and that profiles don't contain junk data.
   if (uncompressed_data.CountUnreadBytes() > 0) {
-    *error = "Unexpected content in the profile file";
+    *error = "Unexpected content in the profile file: " +
+        std::to_string(uncompressed_data.CountUnreadBytes()) + " extra bytes";
     return kProfileLoadBadData;
   } else {
     return kProfileLoadSuccess;
@@ -1518,6 +1626,33 @@
                                                                  other_dex_data->checksum));
     DCHECK(dex_data != nullptr);
 
+    // Merge counters for methods and class. Must be done before we merge the bitmaps so that
+    // we can tell if the data is new or not.
+    if (StoresAggregationCounters()) {
+      // Class aggregation counters.
+      if (merge_classes) {
+        for (const dex::TypeIndex& type_idx : other_dex_data->class_set) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->class_counters[type_idx.index_]
+              : (dex_data->ContainsClass(type_idx) ? 1 : 0);
+
+          dex_data->class_counters[type_idx.index_] =
+              IncrementAggregationCounter(dex_data->class_counters[type_idx.index_], amount);
+        }
+      }
+
+      // Method aggregation counters.
+      for (uint16_t method_idx = 0; method_idx < other_dex_data->num_method_ids; method_idx++) {
+        if (other_dex_data->GetHotnessInfo(method_idx).IsInProfile()) {
+          uint16_t amount = other.StoresAggregationCounters()
+              ? other_dex_data->method_counters[method_idx]
+              : (dex_data->GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0);
+          dex_data->method_counters[method_idx] =
+              IncrementAggregationCounter(dex_data->method_counters[method_idx], amount);
+        }
+      }
+    }
+
     // Merge the classes.
     if (merge_classes) {
       dex_data->class_set.insert(other_dex_data->class_set.begin(),
@@ -1552,6 +1687,13 @@
     // Merge the method bitmaps.
     dex_data->MergeBitmap(*other_dex_data);
   }
+
+  // Global aggregation counter.
+  if (StoresAggregationCounters()) {
+    uint16_t amount = other.StoresAggregationCounters() ? other.aggregation_count_ : 1;
+    aggregation_count_ = IncrementAggregationCounter(aggregation_count_, amount);
+  }
+
   return true;
 }
 
@@ -1614,11 +1756,7 @@
 
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
   const DexFileData* dex_data = FindDexData(&dex_file);
-  if (dex_data != nullptr) {
-    const ArenaSet<dex::TypeIndex>& classes = dex_data->class_set;
-    return classes.find(type_idx) != classes.end();
-  }
-  return false;
+  return (dex_data != nullptr) && dex_data->ContainsClass(type_idx);
 }
 
 uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
@@ -1753,6 +1891,9 @@
 bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
   // No need to compare profile_key_map_. That's only a cache for fast search.
   // All the information is already in the info_ vector.
+  if (memcmp(version_, other.version_, kProfileVersionSize) != 0) {
+    return false;
+  }
   if (info_.size() != other.info_.size()) {
     return false;
   }
@@ -1763,6 +1904,9 @@
       return false;
     }
   }
+  if (aggregation_count_ != other.aggregation_count_) {
+    return false;
+  }
   return true;
 }
 
@@ -1965,9 +2109,8 @@
   SetMethodHotness(index, flags);
 
   if ((flags & MethodHotness::kFlagHot) != 0) {
-    method_map.FindOrAdd(
-        index,
-        InlineCacheMap(std::less<uint16_t>(), allocator_->Adapter(kArenaAllocProfile)));
+    ProfileCompilationInfo::InlineCacheMap* result = FindOrAddMethod(index);
+    DCHECK(result != nullptr);
   }
   return true;
 }
@@ -2000,6 +2143,43 @@
   return ret;
 }
 
+int32_t ProfileCompilationInfo::DexFileData::GetMethodAggregationCounter(
+      uint16_t method_idx) const {
+  CHECK_GT(method_counters.size(), method_idx) << "Profile not prepared for aggregation counters";
+  if (!GetHotnessInfo(method_idx).IsInProfile()) {
+    return -1;
+  }
+
+  return method_counters[method_idx];
+}
+
+int32_t ProfileCompilationInfo::DexFileData::GetClassAggregationCounter(uint16_t type_idx) const {
+  CHECK_GT(class_counters.size(), type_idx) << "Profile not prepared for aggregation counters";
+  if (!ContainsClass(dex::TypeIndex(type_idx))) {
+    return -1;
+  }
+
+  return class_counters[type_idx];
+}
+
+int32_t ProfileCompilationInfo::GetMethodAggregationCounter(
+      const MethodReference& method_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(method_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetMethodAggregationCounter(method_ref.index);
+}
+
+int32_t ProfileCompilationInfo::GetClassAggregationCounter(const TypeReference& type_ref) const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  const DexFileData* dex_data = FindDexData(type_ref.dex_file);
+  return dex_data == nullptr ? -1 : dex_data->GetClassAggregationCounter(type_ref.index);
+}
+
+uint16_t ProfileCompilationInfo::GetAggregationCounter() const {
+  CHECK(StoresAggregationCounters()) << "Profile not prepared for aggregation counters";
+  return aggregation_count_;
+}
+
 ProfileCompilationInfo::DexPcData*
 ProfileCompilationInfo::FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc) {
   return &(inline_cache->FindOrAdd(dex_pc, DexPcData(&allocator_))->second);
@@ -2096,4 +2276,46 @@
   profile_key_map_.clear();
 }
 
+bool ProfileCompilationInfo::StoresAggregationCounters() const {
+  return memcmp(version_, kProfileVersionWithCounters, sizeof(kProfileVersionWithCounters)) == 0;
+}
+
+void ProfileCompilationInfo::PrepareForAggregationCounters() {
+  InitProfileVersionInternal(kProfileVersionWithCounters);
+  for (DexFileData* dex_data : info_) {
+    dex_data->PrepareForAggregationCounters();
+  }
+}
+
+void ProfileCompilationInfo::DexFileData::PrepareForAggregationCounters() {
+  method_counters.resize(num_method_ids);
+  // TODO(calin): we should store the maximum number of types in the profile.
+  // It will simplify quite a few things and make this storage allocation
+  // more efficient.
+  size_t max_elems = 1 << (kBitsPerByte * sizeof(uint16_t));
+  class_counters.resize(max_elems);
+}
+
+const uint8_t* ProfileCompilationInfo::GetVersion() const {
+  return version_;
+}
+
+void ProfileCompilationInfo::InitProfileVersionInternal(const uint8_t version[]) {
+  CHECK(
+      (memcmp(version, kProfileVersion, kProfileVersionSize) == 0) ||
+      (memcmp(version, kProfileVersionWithCounters, kProfileVersionSize) == 0));
+  memcpy(version_, version, kProfileVersionSize);
+}
+
+uint16_t ProfileCompilationInfo::DexFileData::GetNumMethodCounters() const {
+  uint16_t num_method_counters = 0;
+  for (uint16_t method_idx = 0; method_idx < num_method_ids; method_idx++) {
+    num_method_counters += GetHotnessInfo(method_idx).IsInProfile() ? 1 : 0;
+  }
+  return num_method_counters;
+}
+
+bool ProfileCompilationInfo::DexFileData::ContainsClass(const dex::TypeIndex type_index) const {
+  return class_set.find(type_index) != class_set.end();
+}
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info.h b/libprofile/profile/profile_compilation_info.h
index 92fa098..fa4615b 100644
--- a/libprofile/profile/profile_compilation_info.h
+++ b/libprofile/profile/profile_compilation_info.h
@@ -73,9 +73,10 @@
  public:
   static const uint8_t kProfileMagic[];
   static const uint8_t kProfileVersion[];
-
+  static const uint8_t kProfileVersionWithCounters[];
   static const char kDexMetadataProfileEntry[];
 
+  static constexpr size_t kProfileVersionSize = 4;
   static constexpr uint8_t kIndividualInlineCacheSize = 5;
 
   // Data structures for encoding the offline representation of inline caches.
@@ -447,6 +448,30 @@
   // Clears all the data from the profile.
   void ClearData();
 
+  // Prepare the profile to store aggregation counters.
+  // This will change the profile version and allocate extra storage for the counters.
+  // It allocates 2 bytes for every possible method and class, so do not use in performance
+  // critical code which needs to be memory efficient.
+  void PrepareForAggregationCounters();
+
+  // Returns true if the profile is configured to store aggregation counters.
+  bool StoresAggregationCounters() const;
+
+  // Returns the aggregation counter for the given method.
+  // Returns -1 if the method is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetMethodAggregationCounter(const MethodReference& method_ref) const;
+  // Returns the aggregation counter for the given class.
+  // Returns -1 if the class is not in the profile.
+  // CHECKs that the profile is configured to store aggregations counters.
+  int32_t GetClassAggregationCounter(const TypeReference& type_ref) const;
+  // Returns the number of times the profile was merged.
+  // CHECKs that the profile is configured to store aggregations counters.
+  uint16_t GetAggregationCounter() const;
+
+  // Return the version of this profile.
+  const uint8_t* GetVersion() const;
+
  private:
   enum ProfileLoadStatus {
     kProfileLoadWouldOverwiteData,
@@ -470,7 +495,8 @@
                 const std::string& key,
                 uint32_t location_checksum,
                 uint16_t index,
-                uint32_t num_methods)
+                uint32_t num_methods,
+                bool store_aggregation_counters)
         : allocator_(allocator),
           profile_key(key),
           profile_index(index),
@@ -478,13 +504,18 @@
           method_map(std::less<uint16_t>(), allocator->Adapter(kArenaAllocProfile)),
           class_set(std::less<dex::TypeIndex>(), allocator->Adapter(kArenaAllocProfile)),
           num_method_ids(num_methods),
-          bitmap_storage(allocator->Adapter(kArenaAllocProfile)) {
+          bitmap_storage(allocator->Adapter(kArenaAllocProfile)),
+          method_counters(allocator->Adapter(kArenaAllocProfile)),
+          class_counters(allocator->Adapter(kArenaAllocProfile)) {
       bitmap_storage.resize(ComputeBitmapStorage(num_method_ids));
       if (!bitmap_storage.empty()) {
         method_bitmap =
             BitMemoryRegion(MemoryRegion(
                 &bitmap_storage[0], bitmap_storage.size()), 0, ComputeBitmapBits(num_method_ids));
       }
+      if (store_aggregation_counters) {
+        PrepareForAggregationCounters();
+      }
     }
 
     static size_t ComputeBitmapBits(uint32_t num_method_ids) {
@@ -495,7 +526,13 @@
     }
 
     bool operator==(const DexFileData& other) const {
-      return checksum == other.checksum && method_map == other.method_map;
+      return checksum == other.checksum &&
+          num_method_ids == other.num_method_ids &&
+          method_map == other.method_map &&
+          class_set == other.class_set &&
+          (BitMemoryRegion::Compare(method_bitmap, other.method_bitmap) == 0) &&
+          class_counters == other.class_counters &&
+          method_counters == other.method_counters;
     }
 
     // Mark a method as executed at least once.
@@ -510,6 +547,14 @@
 
     void SetMethodHotness(size_t index, MethodHotness::Flag flags);
     MethodHotness GetHotnessInfo(uint32_t dex_method_index) const;
+    void PrepareForAggregationCounters();
+
+    int32_t GetMethodAggregationCounter(uint16_t method_index) const;
+    int32_t GetClassAggregationCounter(uint16_t type_index) const;
+
+    uint16_t GetNumMethodCounters() const;
+
+    bool ContainsClass(const dex::TypeIndex type_index) const;
 
     // The allocator used to allocate new inline cache maps.
     ArenaAllocator* const allocator_;
@@ -519,7 +564,7 @@
     uint8_t profile_index;
     // The dex checksum.
     uint32_t checksum;
-    // The methonds' profile information.
+    // The methods' profile information.
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
@@ -531,6 +576,8 @@
     uint32_t num_method_ids;
     ArenaVector<uint8_t> bitmap_storage;
     BitMemoryRegion method_bitmap;
+    ArenaVector<uint16_t> method_counters;
+    ArenaVector<uint16_t> class_counters;
 
    private:
     enum BitmapIndex {
@@ -761,6 +808,11 @@
                    const SafeMap<uint8_t, uint8_t>& dex_profile_index_remap,
                    /*out*/std::string* error);
 
+  // Read the aggregation counters from the buffer.
+  bool ReadAggregationCounters(SafeBuffer& buffer,
+                               DexFileData& dex_data,
+                               /*out*/std::string* error);
+
   // The method generates mapping of profile indices while merging a new profile
   // data into current data. It returns true, if the mapping was successful.
   bool RemapProfileIndex(const std::vector<ProfileLineHeader>& profile_line_headers,
@@ -792,6 +844,9 @@
   // if no previous data exists.
   DexPcData* FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc);
 
+  // Initializes the profile version to the desired one.
+  void InitProfileVersionInternal(const uint8_t version[]);
+
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
@@ -809,6 +864,14 @@
   // This is used to speed up searches since it avoids iterating
   // over the info_ vector when searching by profile key.
   ArenaSafeMap<const std::string, uint8_t> profile_key_map_;
+
+  // The version of the profile.
+  // This may change if a "normal" profile is transformed to keep track
+  // of aggregation counters.
+  uint8_t version_[kProfileVersionSize];
+
+  // Stored only when the profile is configured to keep track of aggregation counters.
+  uint16_t aggregation_count_;
 };
 
 }  // namespace art
diff --git a/libprofile/profile/profile_compilation_info_test.cc b/libprofile/profile/profile_compilation_info_test.cc
index a2bfe50..47019c4 100644
--- a/libprofile/profile/profile_compilation_info_test.cc
+++ b/libprofile/profile/profile_compilation_info_test.cc
@@ -1141,4 +1141,180 @@
   ASSERT_TRUE(loaded_info.Equals(info));
 }
 
+TEST_F(ProfileCompilationInfoTest, PrepareForAggregationCounters) {
+  ProfileCompilationInfo info;
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersion,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+
+  info.PrepareForAggregationCounters();
+
+  ASSERT_EQ(
+      memcmp(info.GetVersion(),
+             ProfileCompilationInfo::kProfileVersionWithCounters,
+             ProfileCompilationInfo::kProfileVersionSize),
+      0);
+  ASSERT_TRUE(info.StoresAggregationCounters());
+  ASSERT_EQ(info.GetAggregationCounter(), 0);
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  ASSERT_EQ(info1.GetAggregationCounter(), 0);
+  info1.MergeWith(info2);
+  ASSERT_EQ(info1.GetAggregationCounter(), 1);
+  info1.MergeWith(info3);
+  ASSERT_EQ(info1.GetAggregationCounter(), 2);
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+
+  // Check methods that do not exists.
+  ASSERT_EQ(-1, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 4)));
+  ASSERT_EQ(-1, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(40))));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveAndLoadAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+  ProfileCompilationInfo info3;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  info1.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+  info2.AddMethodIndex(Hotness::kFlagPostStartup, location, checksum, 3, kMaxMethodIds);
+  info3.AddMethodIndex(Hotness::kFlagStartup, location, checksum, 3, kMaxMethodIds);
+
+  AddMethod(location, checksum, /* method_idx= */ 6, &info2);
+  AddMethod(location, checksum, /* method_idx= */ 6, &info3);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+  AddClass(location, checksum, dex::TypeIndex(20), &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(30), &info1);
+  AddClass(location, checksum, dex::TypeIndex(30), &info2);
+  AddClass(location, checksum, dex::TypeIndex(30), &info3);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info3);
+
+  ScratchFile profile;
+
+  ASSERT_TRUE(info1.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(info1));
+
+  ASSERT_EQ(2, loaded_info.GetAggregationCounter());
+
+  ASSERT_EQ(0, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+  ASSERT_EQ(2, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 3)));
+  ASSERT_EQ(1, loaded_info.GetMethodAggregationCounter(MethodReference(dex.get(), 6)));
+
+  ASSERT_EQ(0, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(1, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+  ASSERT_EQ(2, loaded_info.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(30))));
+}
+
+TEST_F(ProfileCompilationInfoTest, MergeTwoWithAggregationCounters) {
+  ProfileCompilationInfo info1;
+  info1.PrepareForAggregationCounters();
+
+  ProfileCompilationInfo info2;
+
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("ManyMethods"));
+  std::string location = dex->GetLocation();
+  int checksum = dex->GetLocationChecksum();
+
+  AddMethod(location, checksum, /* method_idx= */ 1, &info1);
+
+  AddMethod(location, checksum, /* method_idx= */ 2, &info1);
+  AddMethod(location, checksum, /* method_idx= */ 2, &info2);
+
+  AddClass(location, checksum, dex::TypeIndex(20), &info1);
+
+  AddClass(location, checksum, dex::TypeIndex(10), &info1);
+  AddClass(location, checksum, dex::TypeIndex(10), &info2);
+
+  info1.MergeWith(info2);
+  info1.MergeWith(info2);
+  ASSERT_EQ(2, info1.GetAggregationCounter());
+
+  // Save and load the profile to create a copy of the data
+  ScratchFile profile;
+  info1.Save(GetFd(profile));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  loaded_info.PrepareForAggregationCounters();
+  profile.GetFile()->ResetOffset();
+  loaded_info.Load(GetFd(profile));
+
+  // Merge the data
+  info1.MergeWith(loaded_info);
+
+  ASSERT_EQ(4, info1.GetAggregationCounter());
+
+  ASSERT_EQ(0, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 1)));
+  ASSERT_EQ(4, info1.GetMethodAggregationCounter(MethodReference(dex.get(), 2)));
+
+  ASSERT_EQ(4, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(10))));
+  ASSERT_EQ(0, info1.GetClassAggregationCounter(TypeReference(dex.get(), dex::TypeIndex(20))));
+}
+
 }  // namespace art
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index 4dc5262..b65bb43 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -32,7 +32,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
         const ScopedFlock& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK(!profile_files.empty());
 
   ProfileCompilationInfo info;
@@ -42,6 +43,12 @@
     return kErrorBadProfiles;
   }
 
+  // If we need to store aggregation counters (e.g. for the boot image profile),
+  // prepare the reference profile now.
+  if (store_aggregation_counters) {
+    info.PrepareForAggregationCounters();
+  }
+
   // Store the current state of the reference profile before merging with the current profiles.
   uint32_t number_of_methods = info.GetNumberOfMethods();
   uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
@@ -124,7 +131,8 @@
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<int>& profile_files_fd,
         int reference_profile_file_fd,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   DCHECK_GE(reference_profile_file_fd, 0);
 
   std::string error;
@@ -147,13 +155,15 @@
 
   return ProcessProfilesInternal(profile_files.Get(),
                                  reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfiles(
         const std::vector<std::string>& profile_files,
         const std::string& reference_profile_file,
-        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn) {
+        const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+        bool store_aggregation_counters) {
   std::string error;
 
   ScopedFlockList profile_files_list(profile_files.size());
@@ -171,7 +181,8 @@
 
   return ProcessProfilesInternal(profile_files_list.Get(),
                                  locked_reference_profile_file,
-                                 filter_fn);
+                                 filter_fn,
+                                 store_aggregation_counters);
 }
 
 }  // namespace art
diff --git a/profman/profile_assistant.h b/profman/profile_assistant.h
index c1d6f8e..45d4e38 100644
--- a/profman/profile_assistant.h
+++ b/profman/profile_assistant.h
@@ -55,19 +55,22 @@
       const std::vector<std::string>& profile_files,
       const std::string& reference_profile_file,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
   static ProcessingResult ProcessProfiles(
       const std::vector<int>& profile_files_fd_,
       int reference_profile_file_fd,
       const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn
-          = ProfileCompilationInfo::ProfileFilterFnAcceptAll);
+          = ProfileCompilationInfo::ProfileFilterFnAcceptAll,
+      bool store_aggregation_counters = false);
 
  private:
   static ProcessingResult ProcessProfilesInternal(
       const std::vector<ScopedFlock>& profile_files,
       const ScopedFlock& reference_profile_file,
-      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn);
+      const ProfileCompilationInfo::ProfileLoadFilterFn& filter_fn,
+      bool store_aggregation_counters);
 
   DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
 };
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index e9d3290..e906151 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -102,7 +102,7 @@
       }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1,
+      ASSERT_TRUE(info->AddClassIndex(ProfileCompilationInfo::GetProfileDexFileKey(dex_location1),
                                       dex_location_checksum1,
                                       dex::TypeIndex(i),
                                       number_of_methods1));
@@ -1300,4 +1300,57 @@
   }
 }
 
+TEST_F(ProfileAssistantTest, MergeProfilesWithCounters) {
+  ScratchFile profile1;
+  ScratchFile profile2;
+  ScratchFile reference_profile;
+
+  // The new profile info will contain methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  const uint16_t kNumberOfClasses = 50;
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+  const DexFile& d1 = *dex_files[0];
+  const DexFile& d2 = *dex_files[1];
+  ProfileCompilationInfo info1;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile1, &info1);
+  ProfileCompilationInfo info2;
+  SetupProfile(
+      d1.GetLocation(), d1.GetLocationChecksum(),
+      d2.GetLocation(), d2.GetLocationChecksum(),
+      kNumberOfMethodsToEnableCompilation, kNumberOfClasses, profile2, &info2);
+
+  std::string profman_cmd = GetProfmanCmd();
+  std::vector<std::string> argv_str;
+  argv_str.push_back(profman_cmd);
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile1.GetFd()));
+  argv_str.push_back("--profile-file-fd=" + std::to_string(profile2.GetFd()));
+  argv_str.push_back("--reference-profile-file-fd=" + std::to_string(reference_profile.GetFd()));
+  argv_str.push_back("--store-aggregation-counters");
+  std::string error;
+
+  EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0) << error;
+
+  // Verify that we can load the result and that the counters are in place.
+
+  ProfileCompilationInfo result;
+  result.PrepareForAggregationCounters();
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile.GetFd()));
+
+  ASSERT_TRUE(result.StoresAggregationCounters());
+  ASSERT_EQ(2, result.GetAggregationCounter());
+
+  for (uint16_t i = 0; i < kNumberOfMethodsToEnableCompilation; i++) {
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d1, i)));
+    ASSERT_EQ(1, result.GetMethodAggregationCounter(MethodReference(&d2, i)));
+  }
+  for (uint16_t i = 0; i < kNumberOfClasses; i++) {
+    ASSERT_EQ(1, result.GetClassAggregationCounter(TypeReference(&d1, dex::TypeIndex(i))));
+  }
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 2935a05..a0c387d 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -157,6 +157,9 @@
   UsageError("      the file passed with --profile-fd(file) to the profile passed with");
   UsageError("      --reference-profile-fd(file) and update at the same time the profile-key");
   UsageError("      of entries corresponding to the apks passed with --apk(-fd).");
+  UsageError("  --store-aggregation-counters: if present, profman will compute and store");
+  UsageError("      the aggregation counters of classes and methods in the output profile.");
+  UsageError("      In this case the profile will have a different version.");
   UsageError("");
 
   exit(EXIT_FAILURE);
@@ -200,7 +203,8 @@
       test_profile_class_percentage_(kDefaultTestProfileClassPercentage),
       test_profile_seed_(NanoTime()),
       start_ns_(NanoTime()),
-      copy_and_update_profile_key_(false) {}
+      copy_and_update_profile_key_(false),
+      store_aggregation_counters_(false) {}
 
   ~ProfMan() {
     LogCompletionTime();
@@ -287,6 +291,8 @@
         ParseUintOption(option, "--generate-test-profile-seed", &test_profile_seed_, Usage);
       } else if (option.starts_with("--copy-and-update-profile-key")) {
         copy_and_update_profile_key_ = true;
+      } else if (option.starts_with("--store-aggregation-counters")) {
+        store_aggregation_counters_ = true;
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -363,12 +369,14 @@
       File file(reference_profile_file_fd_, false);
       result = ProfileAssistant::ProcessProfiles(profile_files_fd_,
                                                  reference_profile_file_fd_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
       CloseAllFds(profile_files_fd_, "profile_files_fd_");
     } else {
       result = ProfileAssistant::ProcessProfiles(profile_files_,
                                                  reference_profile_file_,
-                                                 filter_fn);
+                                                 filter_fn,
+                                                 store_aggregation_counters_);
     }
     return result;
   }
@@ -1279,6 +1287,7 @@
   uint32_t test_profile_seed_;
   uint64_t start_ns_;
   bool copy_and_update_profile_key_;
+  bool store_aggregation_counters_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.