Merge "Refactor use of __ANDROID__ macro"
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 836bcfa..ff4b9a7 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -795,6 +795,11 @@
     return false;
   }
 
+  if (!method->IsCompilable()) {
+    VLOG(compiler) << "Method " << PrettyMethod(method)
+                   << " has soft failures un-handled by the compiler, so it cannot be inlined";
+  }
+
   if (!method->GetDeclaringClass()->IsVerified()) {
     uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (Runtime::Current()->UseJit() ||
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 12cb826..00cd2f6 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -215,6 +215,17 @@
   }
 }
 
+HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) {
+  HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot);
+  if (!ref->CanBeNull()) {
+    return ref;
+  }
+
+  HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc);
+  AppendInstruction(null_check);
+  return null_check;
+}
+
 void HInstructionBuilder::SetLoopHeaderPhiInputs() {
   for (size_t i = loop_headers_.size(); i > 0; --i) {
     HBasicBlock* block = loop_headers_[i - 1];
@@ -1084,10 +1095,9 @@
   size_t start_index = 0;
   size_t argument_index = 0;
   if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
-    AppendInstruction(null_check);
-    invoke->SetArgumentAt(0, null_check);
+    HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0],
+                                             invoke->GetDexPc());
+    invoke->SetArgumentAt(0, arg);
     start_index = 1;
     argument_index = 1;
   }
@@ -1193,9 +1203,7 @@
       compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
 
 
-  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
-  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
-  AppendInstruction(null_check);
+  HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc);
 
   Primitive::Type field_type = (resolved_field == nullptr)
       ? GetFieldAccessType(*dex_file_, field_index)
@@ -1205,14 +1213,14 @@
     HInstruction* field_set = nullptr;
     if (resolved_field == nullptr) {
       MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
                                                            value,
                                                            field_type,
                                                            field_index,
                                                            dex_pc);
     } else {
       uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_set = new (arena_) HInstanceFieldSet(null_check,
+      field_set = new (arena_) HInstanceFieldSet(object,
                                                  value,
                                                  field_type,
                                                  resolved_field->GetOffset(),
@@ -1228,13 +1236,13 @@
     HInstruction* field_get = nullptr;
     if (resolved_field == nullptr) {
       MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
                                                            field_type,
                                                            field_index,
                                                            dex_pc);
     } else {
       uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_get = new (arena_) HInstanceFieldGet(null_check,
+      field_get = new (arena_) HInstanceFieldGet(object,
                                                  field_type,
                                                  resolved_field->GetOffset(),
                                                  resolved_field->IsVolatile(),
@@ -1449,10 +1457,7 @@
   uint8_t array_reg = instruction.VRegB_23x();
   uint8_t index_reg = instruction.VRegC_23x();
 
-  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
-  object = new (arena_) HNullCheck(object, dex_pc);
-  AppendInstruction(object);
-
+  HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc);
   HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
   AppendInstruction(length);
   HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
@@ -1527,11 +1532,8 @@
 }
 
 void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
-  AppendInstruction(null_check);
-
-  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
+  HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc);
+  HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
   AppendInstruction(length);
 
   int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
@@ -1547,28 +1549,28 @@
 
   switch (payload->element_width) {
     case 1:
-      BuildFillArrayData(null_check,
+      BuildFillArrayData(array,
                          reinterpret_cast<const int8_t*>(data),
                          element_count,
                          Primitive::kPrimByte,
                          dex_pc);
       break;
     case 2:
-      BuildFillArrayData(null_check,
+      BuildFillArrayData(array,
                          reinterpret_cast<const int16_t*>(data),
                          element_count,
                          Primitive::kPrimShort,
                          dex_pc);
       break;
     case 4:
-      BuildFillArrayData(null_check,
+      BuildFillArrayData(array,
                          reinterpret_cast<const int32_t*>(data),
                          element_count,
                          Primitive::kPrimInt,
                          dex_pc);
       break;
     case 8:
-      BuildFillWideArrayData(null_check,
+      BuildFillWideArrayData(array,
                              reinterpret_cast<const int64_t*>(data),
                              element_count,
                              dex_pc);
@@ -2575,9 +2577,7 @@
     ARRAY_XX(_SHORT, Primitive::kPrimShort);
 
     case Instruction::ARRAY_LENGTH: {
-      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
-      object = new (arena_) HNullCheck(object, dex_pc);
-      AppendInstruction(object);
+      HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc);
       AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
       UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
       break;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 070f7da..0e3e5a7 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -87,6 +87,7 @@
   ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
   HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
   HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+  HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
   void UpdateLocal(uint32_t register_index, HInstruction* instruction);
 
   void AppendInstruction(HInstruction* instruction);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cf973aa..1524e1e 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -385,6 +385,92 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenBitCount(LocationSummary* locations,
+                        const Primitive::Type type,
+                        Mips64Assembler* assembler) {
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  //
+  // A generalization of the best bit counting method to integers of
+  // bit-widths up to 128 (parameterized by type T) is this:
+  //
+  // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
+  // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
+  // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
+  // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+  //
+  // For comparison, for 32-bit quantities, this algorithm can be executed
+  // using 20 MIPS instructions (the calls to LoadConst32() generate two
+  // machine instructions each for the values being used in this algorithm).
+  // A(n unrolled) loop-based algorithm requires 25 instructions.
+  //
+  // For a 64-bit operand this can be performed in 24 instructions compared
+  // to a(n unrolled) loop based algorithm which requires 38 instructions.
+  //
+  // There are algorithms which are faster in the cases where very few
+  // bits are set but the algorithm here attempts to minimize the total
+  // number of instructions executed even when a large number of bits
+  // are set.
+
+  if (type == Primitive::kPrimInt) {
+    __ Srl(TMP, in, 1);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, TMP, AT);
+    __ Subu(TMP, in, TMP);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(out, TMP, AT);
+    __ Srl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Addu(TMP, out, TMP);
+    __ Srl(out, TMP, 4);
+    __ Addu(out, out, TMP);
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(out, out, AT);
+    __ LoadConst32(TMP, 0x01010101);
+    __ MulR6(out, out, TMP);
+    __ Srl(out, out, 24);
+  } else if (type == Primitive::kPrimLong) {
+    __ Dsrl(TMP, in, 1);
+    __ LoadConst64(AT, 0x5555555555555555L);
+    __ And(TMP, TMP, AT);
+    __ Dsubu(TMP, in, TMP);
+    __ LoadConst64(AT, 0x3333333333333333L);
+    __ And(out, TMP, AT);
+    __ Dsrl(TMP, TMP, 2);
+    __ And(TMP, TMP, AT);
+    __ Daddu(TMP, out, TMP);
+    __ Dsrl(out, TMP, 4);
+    __ Daddu(out, out, TMP);
+    __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+    __ And(out, out, AT);
+    __ LoadConst64(TMP, 0x0101010101010101L);
+    __ Dmul(out, out, TMP);
+    __ Dsrl32(out, out, 24);
+  }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// int java.lang.Long.bitCount(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
   FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
   FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1693,9 +1779,6 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongBitCount)
-
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3670ce2..37197af 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -769,15 +769,6 @@
   return codegen.release();
 }
 
-static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
-  // For access errors the compiler will use the unresolved helpers (e.g. HInvokeUnresolved).
-  uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
-      | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
-  return (verified_method->GetEncounteredVerificationFailures() & (~unresolved_mask)) == 0;
-}
-
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
@@ -792,7 +783,8 @@
   const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
   DCHECK(!verified_method->HasRuntimeThrow());
   if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-      || CanHandleVerificationFailure(verified_method)) {
+      || verifier::MethodVerifier::CanCompilerHandleVerificationFailure(
+            verified_method->GetEncounteredVerificationFailures())) {
     ArenaAllocator arena(Runtime::Current()->GetArenaPool());
     CodeVectorAllocator code_allocator(&arena);
     std::unique_ptr<CodeGenerator> codegen(
@@ -865,6 +857,7 @@
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       method->GetDeclaringClass()->GetClassLoader()));
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+  DCHECK(method->IsCompilable());
 
   jobject jclass_loader = class_loader.ToJObject();
   const DexFile* dex_file = method->GetDexFile();
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index 58e8a3a..a25460e 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -21,44 +21,41 @@
 
 namespace art {
 
-// Minimum number of new methods that profiles must contain to enable recompilation.
+// Minimum number of new methods/classes that profiles
+// must contain to enable recompilation.
 static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+static constexpr const uint32_t kMinNewClassesForCompilation = 10;
 
 ProfileAssistant::ProcessingResult ProfileAssistant::ProcessProfilesInternal(
         const std::vector<ScopedFlock>& profile_files,
         const ScopedFlock& reference_profile_file) {
   DCHECK(!profile_files.empty());
 
-  std::vector<ProfileCompilationInfo> new_info(profile_files.size());
-  bool should_compile = false;
-  // Read the main profile files.
-  for (size_t i = 0; i < new_info.size(); i++) {
-    if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
-      LOG(WARNING) << "Could not load profile file at index " << i;
-      return kErrorBadProfiles;
-    }
-    // Do we have enough new profiled methods that will make the compilation worthwhile?
-    should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
-  }
-
-  if (!should_compile) {
-    return kSkipCompilation;
-  }
-
-  // Merge information.
   ProfileCompilationInfo info;
+  // Load the reference profile.
   if (!info.Load(reference_profile_file.GetFile()->Fd())) {
     LOG(WARNING) << "Could not load reference profile file";
     return kErrorBadProfiles;
   }
 
-  for (size_t i = 0; i < new_info.size(); i++) {
-    // Merge all data into a single object.
-    if (!info.Load(new_info[i])) {
-      LOG(WARNING) << "Could not merge profile data at index " << i;
+  // Store the current state of the reference profile before merging with the current profiles.
+  uint32_t number_of_methods = info.GetNumberOfMethods();
+  uint32_t number_of_classes = info.GetNumberOfResolvedClasses();
+
+  // Merge all current profiles.
+  for (size_t i = 0; i < profile_files.size(); i++) {
+    if (!info.Load(profile_files[i].GetFile()->Fd())) {
+      LOG(WARNING) << "Could not load profile file at index " << i;
       return kErrorBadProfiles;
     }
   }
+
+  // Check if there is enough new information added by the current profiles.
+  if (((info.GetNumberOfMethods() - number_of_methods) < kMinNewMethodsForCompilation) &&
+      ((info.GetNumberOfResolvedClasses() - number_of_classes) < kMinNewClassesForCompilation)) {
+    return kSkipCompilation;
+  }
+
   // We were successful in merging all profile information. Update the reference profile.
   if (!reference_profile_file.GetFile()->ClearContent()) {
     PLOG(WARNING) << "Could not clear reference profile file";
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index b0d5df2..462c397 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -29,6 +29,7 @@
   void SetupProfile(const std::string& id,
                     uint32_t checksum,
                     uint16_t number_of_methods,
+                    uint16_t number_of_classes,
                     const ScratchFile& profile,
                     ProfileCompilationInfo* info,
                     uint16_t start_method_index = 0) {
@@ -40,6 +41,10 @@
       ASSERT_TRUE(info->AddMethodIndex(dex_location1, dex_location_checksum1, i));
       ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
     }
+    for (uint16_t i = 0; i < number_of_classes; i++) {
+      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, i));
+    }
+
     ASSERT_TRUE(info->Save(GetFd(profile)));
     ASSERT_EQ(0, profile.GetFile()->Flush());
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
@@ -89,9 +94,9 @@
 
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
   // We should advise compilation.
   ASSERT_EQ(ProfileAssistant::kCompile,
@@ -102,8 +107,8 @@
   ASSERT_TRUE(result.Load(reference_profile_fd));
 
   ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
   ASSERT_TRUE(expected.Equals(result));
 
   // The information from profiles must remain the same.
@@ -111,6 +116,35 @@
   CheckProfileInfo(profile2, info2);
 }
 
+// TODO(calin): Add more tests for classes.
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferencesBecauseOfClasses) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({
+      GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  const uint16_t kNumberOfClassesToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, 0, kNumberOfClassesToEnableCompilation, profile1, &info1);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profiles must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
 TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
   ScratchFile profile1;
   ScratchFile profile2;
@@ -124,15 +158,15 @@
   // The new profile info will contain the methods with indices 0-100.
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
 
   // The reference profile info will contain the methods with indices 50-150.
   const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
   ProfileCompilationInfo reference_info;
-  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile,
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
       &reference_info, kNumberOfMethodsToEnableCompilation / 2);
 
   // We should advise compilation.
@@ -145,9 +179,9 @@
   ASSERT_TRUE(result.Load(reference_profile_fd));
 
   ProfileCompilationInfo expected;
-  ASSERT_TRUE(expected.Load(info1));
-  ASSERT_TRUE(expected.Load(info2));
-  ASSERT_TRUE(expected.Load(reference_info));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.MergeWith(info2));
+  ASSERT_TRUE(expected.MergeWith(reference_info));
   ASSERT_TRUE(expected.Equals(result));
 
   // The information from profiles must remain the same.
@@ -167,9 +201,9 @@
 
   const uint16_t kNumberOfMethodsToSkipCompilation = 1;
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+  SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, 0, profile2, &info2);
 
   // We should not advise compilation.
   ASSERT_EQ(ProfileAssistant::kSkipCompilation,
@@ -207,9 +241,9 @@
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   // Assign different hashes for the same dex file. This will make merging of information to fail.
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo info2;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, profile2, &info2);
 
   // We should fail processing.
   ASSERT_EQ(ProfileAssistant::kErrorBadProfiles,
@@ -234,9 +268,9 @@
   const uint16_t kNumberOfMethodsToEnableCompilation = 100;
   // Assign different hashes for the same dex file. This will make merging of information to fail.
   ProfileCompilationInfo info1;
-  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1);
   ProfileCompilationInfo reference_info;
-  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+  SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, 0, reference_profile, &reference_info);
 
   // We should not advise compilation.
   ASSERT_TRUE(profile1.GetFile()->ResetOffset());
diff --git a/profman/profman.cc b/profman/profman.cc
index 7c9e449..3e632bc 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
+#include "errno.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
+#include <iostream>
 #include <string>
 #include <vector>
 
@@ -68,6 +70,9 @@
   UsageError("Command: %s", CommandLine().c_str());
   UsageError("Usage: profman [options]...");
   UsageError("");
+  UsageError("  --dump-info-for=<filename>: dumps the content of the profile file");
+  UsageError("      to standard output in a human readable form.");
+  UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
   UsageError("      Can be specified multiple time, in which case the data from the different");
   UsageError("      profiles will be aggregated.");
@@ -117,9 +122,11 @@
       const StringPiece option(argv[i]);
       const bool log_options = false;
       if (log_options) {
-        LOG(INFO) << "patchoat: option[" << i << "]=" << argv[i];
+        LOG(INFO) << "profman: option[" << i << "]=" << argv[i];
       }
-      if (option.starts_with("--profile-file=")) {
+      if (option.starts_with("--dump-info-for=")) {
+        dump_info_for_ = option.substr(strlen("--dump-info-for=")).ToString();
+      } else if (option.starts_with("--profile-file=")) {
         profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
       } else if (option.starts_with("--profile-file-fd=")) {
         ParseFdForCollection(option, "--profile-file-fd", &profile_files_fd_);
@@ -132,13 +139,23 @@
       }
     }
 
-    if (profile_files_.empty() && profile_files_fd_.empty()) {
+    bool has_profiles = !profile_files_.empty() || !profile_files_fd_.empty();
+    bool has_reference_profile = !reference_profile_file_.empty() ||
+        (reference_profile_file_fd_ != -1);
+
+    if (!dump_info_for_.empty()) {
+      if (has_profiles || has_reference_profile) {
+        Usage("dump-info-for cannot be specified together with other options");
+      }
+      return;
+    }
+    if (!has_profiles) {
       Usage("No profile files specified.");
     }
     if (!profile_files_.empty() && !profile_files_fd_.empty()) {
       Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
     }
-    if (!reference_profile_file_.empty() && (reference_profile_file_fd_ != -1)) {
+    if (!has_reference_profile) {
       Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
     }
     if (reference_profile_file_.empty() && (reference_profile_file_fd_ == -1)) {
@@ -160,6 +177,27 @@
     return result;
   }
 
+  int DumpProfileInfo() {
+    int fd = open(dump_info_for_.c_str(), O_RDWR);
+    if (fd < 0) {
+      std::cerr << "Cannot open " << dump_info_for_ << strerror(errno);
+      return -1;
+    }
+    ProfileCompilationInfo info;
+    if (!info.Load(fd)) {
+      std::cerr << "Cannot load profile info from " << dump_info_for_;
+      return -1;
+    }
+    std::string dump = info.DumpInfo(/*dex_files*/ nullptr);
+    info.Save(fd);
+    std::cout << dump << "\n";
+    return 0;
+  }
+
+  bool ShouldOnlyDumpProfile() {
+    return !dump_info_for_.empty();
+  }
+
  private:
   static void ParseFdForCollection(const StringPiece& option,
                                    const char* arg_name,
@@ -186,6 +224,7 @@
   std::string reference_profile_file_;
   int reference_profile_file_fd_;
   uint64_t start_ns_;
+  std::string dump_info_for_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.
@@ -195,6 +234,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  if (profman.ShouldOnlyDumpProfile()) {
+    return profman.DumpProfileInfo();
+  }
   // Process profile information and assess if we need to do a profile guided compilation.
   // This operation involves I/O.
   return profman.ProcessProfiles();
diff --git a/runtime/art_method.h b/runtime/art_method.h
index d1ef019..08f0285 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -220,6 +220,10 @@
     return !IsAbstract() && !IsDefaultConflicting();
   }
 
+  bool IsCompilable() {
+    return (GetAccessFlags() & kAccCompileDontBother) == 0;
+  }
+
   // A default conflict method is a special sentinel method that stands for a conflict between
   // multiple default methods. It cannot be invoked, throwing an IncompatibleClassChangeError if one
   // attempts to do so.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 1e7ee65..fa0107a 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4442,7 +4442,20 @@
         // We failed to verify, expect either the klass to be erroneous or verification failed at
         // compile time.
         if (klass->IsErroneous()) {
-          CHECK(self->IsExceptionPending());
+          // The class is erroneous. This may be a verifier error, or another thread attempted
+          // verification and/or initialization and failed. We can distinguish those cases by
+          // whether an exception is already pending.
+          if (self->IsExceptionPending()) {
+            // Check that it's a VerifyError.
+            DCHECK_EQ("java.lang.Class<java.lang.VerifyError>",
+                      PrettyClass(self->GetException()->GetClass()));
+          } else {
+            // Check that another thread attempted initialization.
+            DCHECK_NE(0, klass->GetClinitThreadId());
+            DCHECK_NE(self->GetTid(), klass->GetClinitThreadId());
+            // Need to rethrow the previous failure now.
+            ThrowEarlierClassFailure(klass.Get(), true);
+          }
           VlogClassInitializationFailure(klass);
         } else {
           CHECK(Runtime::Current()->IsAotCompiler());
@@ -4452,6 +4465,14 @@
       } else {
         self->AssertNoPendingException();
       }
+
+      // A separate thread could have moved us all the way to initialized. A "simple" example
+      // involves a subclass of the current class being initialized at the same time (which
+      // will implicitly initialize the superclass, if scheduled that way). b/28254258
+      DCHECK_NE(mirror::Class::kStatusError, klass->GetStatus());
+      if (klass->IsInitialized()) {
+        return true;
+      }
     }
 
     // If the class is kStatusInitializing, either this thread is
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 2a66847..c36543f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -119,6 +119,11 @@
   memory_use_.PrintMemoryUse(os);
 }
 
+void Jit::DumpForSigQuit(std::ostream& os) {
+  DumpInfo(os);
+  ProfileSaver::DumpInstanceInfo(os);
+}
+
 void Jit::AddTimingLogger(const TimingLogger& logger) {
   cumulative_timings_.AddLogger(logger);
 }
@@ -297,7 +302,7 @@
 
 void Jit::StopProfileSaver() {
   if (save_profiling_info_ && ProfileSaver::IsStarted()) {
-    ProfileSaver::Stop();
+    ProfileSaver::Stop(dump_info_on_shutdown_);
   }
 }
 
@@ -562,7 +567,7 @@
     return;
   }
 
-  if (method->IsClassInitializer() || method->IsNative()) {
+  if (method->IsClassInitializer() || method->IsNative() || !method->IsCompilable()) {
     // We do not want to compile such methods.
     return;
   }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index ff3acf6..8198c18 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -127,9 +127,7 @@
                          const std::string& app_dir);
   void StopProfileSaver();
 
-  void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_) {
-    DumpInfo(os);
-  }
+  void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_);
 
   static void NewTypeLoadedIfUsingJit(mirror::Class* type)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 752d4ba..6b6f5a5 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -887,13 +887,15 @@
   }
 }
 
-void JitCodeCache::GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
-                                         std::vector<ArtMethod*>& methods) {
+void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                                      std::vector<MethodReference>& methods) {
   ScopedTrace trace(__FUNCTION__);
   MutexLock mu(Thread::Current(), lock_);
-  for (auto it : method_code_map_) {
-    if (ContainsElement(dex_base_locations, it.second->GetDexFile()->GetBaseLocation())) {
-      methods.push_back(it.second);
+  for (const ProfilingInfo* info : profiling_infos_) {
+    ArtMethod* method = info->GetMethod();
+    const DexFile* dex_file = method->GetDexFile();
+    if (ContainsElement(dex_base_locations, dex_file->GetBaseLocation())) {
+      methods.emplace_back(dex_file,  method->GetDexMethodIndex());
     }
   }
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index f31cc51..4df6762 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -26,6 +26,7 @@
 #include "gc/accounting/bitmap.h"
 #include "gc_root.h"
 #include "jni.h"
+#include "method_reference.h"
 #include "oat_file.h"
 #include "object_callbacks.h"
 #include "safe_map.h"
@@ -165,9 +166,9 @@
 
   void* MoreCore(const void* mspace, intptr_t increment);
 
-  // Adds to `methods` all the compiled ArtMethods which are part of any of the given dex locations.
-  void GetCompiledArtMethods(const std::set<std::string>& dex_base_locations,
-                             std::vector<ArtMethod*>& methods)
+  // Adds to `methods` all profiled methods which are part of any of the given dex locations.
+  void GetProfiledMethods(const std::set<std::string>& dex_base_locations,
+                          std::vector<MethodReference>& methods)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index f181ca3..a79bcf0 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -16,7 +16,8 @@
 
 #include "offline_profiling_info.h"
 
-#include <fstream>
+#include "errno.h"
+#include <limits.h>
 #include <vector>
 #include <sys/file.h>
 #include <sys/stat.h>
@@ -34,6 +35,11 @@
 
 namespace art {
 
+const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '1', '\0' };
+
+static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
+
 // Transform the actual dex location into relative paths.
 // Note: this is OK because we don't store profiles of different apps into the same file.
 // Apps with split apks don't cause trouble because each split has a different name and will not
@@ -49,15 +55,27 @@
   }
 }
 
-bool ProfileCompilationInfo::SaveProfilingInfo(
-    const std::string& filename,
-    const std::vector<ArtMethod*>& methods,
+bool ProfileCompilationInfo::AddMethodsAndClasses(
+    const std::vector<MethodReference>& methods,
     const std::set<DexCacheResolvedClasses>& resolved_classes) {
-  if (methods.empty() && resolved_classes.empty()) {
-    VLOG(profiler) << "No info to save to " << filename;
-    return true;
+  for (const MethodReference& method : methods) {
+    if (!AddMethodIndex(GetProfileDexFileKey(method.dex_file->GetLocation()),
+                        method.dex_file->GetLocationChecksum(),
+                        method.dex_method_index)) {
+      return false;
+    }
   }
+  for (const DexCacheResolvedClasses& dex_cache : resolved_classes) {
+    if (!AddResolvedClasses(dex_cache)) {
+      return false;
+    }
+  }
+  return true;
+}
 
+bool ProfileCompilationInfo::MergeAndSave(const std::string& filename,
+                                          uint64_t* bytes_written,
+                                          bool force) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   ScopedFlock flock;
   std::string error;
@@ -68,26 +86,37 @@
 
   int fd = flock.GetFile()->Fd();
 
-  ProfileCompilationInfo info;
-  if (!info.Load(fd)) {
-    LOG(WARNING) << "Could not load previous profile data from file " << filename;
-    return false;
-  }
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    for (ArtMethod* method : methods) {
-      const DexFile* dex_file = method->GetDexFile();
-      if (!info.AddMethodIndex(GetProfileDexFileKey(dex_file->GetLocation()),
-                               dex_file->GetLocationChecksum(),
-                               method->GetDexMethodIndex())) {
+  // Load the file but keep a copy around to be able to infer if the content has changed.
+  ProfileCompilationInfo fileInfo;
+  ProfileLoadSatus status = fileInfo.LoadInternal(fd, &error);
+  if (status == kProfileLoadSuccess) {
+    // Merge the content of file into the current object.
+    if (MergeWith(fileInfo)) {
+      // If after the merge we have the same data as what is the file there's no point
+      // in actually doing the write. The file will be exactly the same as before.
+      if (Equals(fileInfo)) {
+        if (bytes_written != nullptr) {
+          *bytes_written = 0;
+        }
+        return true;
+      }
+    } else {
+      LOG(WARNING) << "Could not merge previous profile data from file " << filename;
+      if (!force) {
         return false;
       }
     }
-    for (const DexCacheResolvedClasses& dex_cache : resolved_classes) {
-      info.AddResolvedClasses(dex_cache);
-    }
+  } else if (force &&
+        ((status == kProfileLoadVersionMismatch) || (status == kProfileLoadBadData))) {
+      // Log a warning but don't return false. We will clear the profile anyway.
+      LOG(WARNING) << "Clearing bad or obsolete profile data from file "
+          << filename << ": " << error;
+  } else {
+    LOG(WARNING) << "Could not load profile data from file " << filename << ": " << error;
+    return false;
   }
 
+  // We need to clear the data because we don't support appending to the profiles yet.
   if (!flock.GetFile()->ClearContent()) {
     PLOG(WARNING) << "Could not clear profile file: " << filename;
     return false;
@@ -95,95 +124,118 @@
 
   // This doesn't need locking because we are trying to lock the file for exclusive
   // access and fail immediately if we can't.
-  bool result = info.Save(fd);
+  bool result = Save(fd);
   if (result) {
     VLOG(profiler) << "Successfully saved profile info to " << filename
         << " Size: " << GetFileSizeBytes(filename);
+    if (bytes_written != nullptr) {
+      *bytes_written = GetFileSizeBytes(filename);
+    }
   } else {
     VLOG(profiler) << "Failed to save profile info to " << filename;
   }
   return result;
 }
 
-static bool WriteToFile(int fd, const std::ostringstream& os) {
-  std::string data(os.str());
-  const char *p = data.c_str();
-  size_t length = data.length();
-  do {
-    int n = TEMP_FAILURE_RETRY(write(fd, p, length));
-    if (n < 0) {
-      PLOG(WARNING) << "Failed to write to descriptor: " << fd;
+// Returns true if all the bytes were successfully written to the file descriptor.
+static bool WriteBuffer(int fd, const uint8_t* buffer, size_t byte_count) {
+  while (byte_count > 0) {
+    int bytes_written = TEMP_FAILURE_RETRY(write(fd, buffer, byte_count));
+    if (bytes_written == -1) {
       return false;
     }
-    p += n;
-    length -= n;
-  } while (length > 0);
+    byte_count -= bytes_written;  // Reduce the number of remaining bytes.
+    buffer += bytes_written;  // Move the buffer forward.
+  }
   return true;
 }
 
-static constexpr const char kFieldSeparator = ',';
-static constexpr const char kLineSeparator = '\n';
-static constexpr const char* kClassesMarker = "classes";
+// Add the string bytes to the buffer.
+static void AddStringToBuffer(std::vector<uint8_t>* buffer, const std::string& value) {
+  buffer->insert(buffer->end(), value.begin(), value.end());
+}
+
+// Insert each byte, from low to high into the buffer.
+template <typename T>
+static void AddUintToBuffer(std::vector<uint8_t>* buffer, T value) {
+  for (size_t i = 0; i < sizeof(T); i++) {
+    buffer->push_back((value >> (i * kBitsPerByte)) & 0xff);
+  }
+}
+
+static constexpr size_t kLineHeaderSize =
+    3 * sizeof(uint16_t) +  // method_set.size + class_set.size + dex_location.size
+    sizeof(uint32_t);       // checksum
 
 /**
  * Serialization format:
- *    dex_location1,dex_location_checksum1,method_id11,method_id12...,classes,class_id1,class_id2...
- *    dex_location2,dex_location_checksum2,method_id21,method_id22...,classes,class_id1,class_id2...
- * e.g.
- *    app.apk,131232145,11,23,454,54,classes,1,2,4,1234
- *    app.apk:classes5.dex,218490184,39,13,49,1
+ *    magic,version,number_of_lines
+ *    dex_location1,number_of_methods1,number_of_classes1,dex_location_checksum1, \
+ *        method_id11,method_id12...,class_id1,class_id2...
+ *    dex_location2,number_of_methods2,number_of_classes2,dex_location_checksum2, \
+ *        method_id21,method_id22...,,class_id1,class_id2...
+ *    .....
  **/
 bool ProfileCompilationInfo::Save(int fd) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
-  // TODO(calin): Profile this and see how much memory it takes. If too much,
-  // write to file directly.
-  std::ostringstream os;
+
+  // Cache at most 5KB before writing.
+  static constexpr size_t kMaxSizeToKeepBeforeWriting = 5 * KB;
+  // Use a vector wrapper to avoid keeping track of offsets when we add elements.
+  std::vector<uint8_t> buffer;
+  WriteBuffer(fd, kProfileMagic, sizeof(kProfileMagic));
+  WriteBuffer(fd, kProfileVersion, sizeof(kProfileVersion));
+  AddUintToBuffer(&buffer, static_cast<uint16_t>(info_.size()));
+
   for (const auto& it : info_) {
+    if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
+      if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
+        return false;
+      }
+      buffer.clear();
+    }
     const std::string& dex_location = it.first;
     const DexFileData& dex_data = it.second;
     if (dex_data.method_set.empty() && dex_data.class_set.empty()) {
       continue;
     }
 
-    os << dex_location << kFieldSeparator << dex_data.checksum;
+    if (dex_location.size() >= kMaxDexFileKeyLength) {
+      LOG(WARNING) << "DexFileKey exceeds allocated limit";
+      return false;
+    }
+
+    // Make sure that the buffer has enough capacity to avoid repeated resizings
+    // while we add data.
+    size_t required_capacity = buffer.size() +
+        kLineHeaderSize +
+        dex_location.size() +
+        sizeof(uint16_t) * (dex_data.class_set.size() + dex_data.method_set.size());
+
+    buffer.reserve(required_capacity);
+
+    DCHECK_LE(dex_location.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.method_set.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_location.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.method_set.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
+    AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
+
+    AddStringToBuffer(&buffer, dex_location);
+
     for (auto method_it : dex_data.method_set) {
-      os << kFieldSeparator << method_it;
+      AddUintToBuffer(&buffer, method_it);
     }
-    if (!dex_data.class_set.empty()) {
-      os << kFieldSeparator << kClassesMarker;
-      for (auto class_id : dex_data.class_set) {
-        os << kFieldSeparator << class_id;
-      }
+    for (auto class_id : dex_data.class_set) {
+      AddUintToBuffer(&buffer, class_id);
     }
-    os << kLineSeparator;
+    DCHECK_EQ(required_capacity, buffer.size())
+        << "Failed to add the expected number of bytes in the buffer";
   }
 
-  return WriteToFile(fd, os);
-}
-
-// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
-// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
-static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
-  const char* p = s.data();
-  const char* end = p + s.size();
-  // Check if the first character is the separator.
-  if (p != end && *p ==separator) {
-    result->push_back("");
-    ++p;
-  }
-  // Process the rest of the characters.
-  while (p != end) {
-    if (*p == separator) {
-      ++p;
-    } else {
-      const char* start = p;
-      while (++p != end && *p != separator) {
-        // Skip to the next occurrence of the separator.
-      }
-      result->push_back(std::string(start, p - start));
-    }
-  }
+  return WriteBuffer(fd, buffer.data(), buffer.size());
 }
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
@@ -233,120 +285,259 @@
   return true;
 }
 
-bool ProfileCompilationInfo::ProcessLine(const std::string& line) {
-  std::vector<std::string> parts;
-  SplitString(line, kFieldSeparator, &parts);
-  if (parts.size() < 3) {
-    LOG(WARNING) << "Invalid line: " << line;
-    return false;
-  }
-
-  const std::string& dex_location = parts[0];
-  uint32_t checksum;
-  if (!ParseInt(parts[1].c_str(), &checksum)) {
-    return false;
-  }
-
-  for (size_t i = 2; i < parts.size(); i++) {
-    if (parts[i] == kClassesMarker) {
-      ++i;
-      // All of the remaining idx are class def indexes.
-      for (++i; i < parts.size(); ++i) {
-        uint32_t class_def_idx;
-        if (!ParseInt(parts[i].c_str(), &class_def_idx)) {
-          LOG(WARNING) << "Cannot parse class_def_idx " << parts[i];
-          return false;
-        } else if (class_def_idx >= std::numeric_limits<uint16_t>::max()) {
-          LOG(WARNING) << "Class def idx " << class_def_idx << " is larger than uint16_t max";
-          return false;
-        }
-        if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
-          return false;
-        }
-      }
-      break;
-    }
-    uint32_t method_idx;
-    if (!ParseInt(parts[i].c_str(), &method_idx)) {
-      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
-      return false;
-    }
+bool ProfileCompilationInfo::ProcessLine(SafeBuffer& line_buffer,
+                                         uint16_t method_set_size,
+                                         uint16_t class_set_size,
+                                         uint32_t checksum,
+                                         const std::string& dex_location) {
+  for (uint16_t i = 0; i < method_set_size; i++) {
+    uint16_t method_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
     if (!AddMethodIndex(dex_location, checksum, method_idx)) {
       return false;
     }
   }
+
+  for (uint16_t i = 0; i < class_set_size; i++) {
+    uint16_t class_def_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
+      return false;
+    }
+  }
   return true;
 }
 
-// Parses the buffer (of length n) starting from start_from and identify new lines
-// based on kLineSeparator marker.
-// Returns the first position after kLineSeparator in the buffer (starting from start_from),
-// or -1 if the marker doesn't appear.
-// The processed characters are appended to the given line.
-static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
-  if (start_from >= n) {
-    return -1;
+// Tests for EOF by trying to read 1 byte from the descriptor.
+// Returns:
+//   0 if the descriptor is at the EOF,
+//  -1 if there was an IO error
+//   1 if the descriptor has more content to read
+static int testEOF(int fd) {
+  uint8_t buffer[1];
+  return TEMP_FAILURE_RETRY(read(fd, buffer, 1));
+}
+
+// Reads an uint value previously written with AddUintToBuffer.
+template <typename T>
+T ProfileCompilationInfo::SafeBuffer::ReadUintAndAdvance() {
+  static_assert(std::is_unsigned<T>::value, "Type is not unsigned");
+  CHECK_LE(ptr_current_ + sizeof(T), ptr_end_);
+  T value = 0;
+  for (size_t i = 0; i < sizeof(T); i++) {
+    value += ptr_current_[i] << (i * kBitsPerByte);
   }
-  int new_line_pos = -1;
-  for (int i = start_from; i < n; i++) {
-    if (buffer[i] == kLineSeparator) {
-      new_line_pos = i;
-      break;
+  ptr_current_ += sizeof(T);
+  return value;
+}
+
+bool ProfileCompilationInfo::SafeBuffer::CompareAndAdvance(const uint8_t* data, size_t data_size) {
+  if (ptr_current_ + data_size > ptr_end_) {
+    return false;
+  }
+  if (memcmp(ptr_current_, data, data_size) == 0) {
+    ptr_current_ += data_size;
+    return true;
+  }
+  return false;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::SafeBuffer::FillFromFd(
+      int fd,
+      const std::string& source,
+      /*out*/std::string* error) {
+  size_t byte_count = ptr_end_ - ptr_current_;
+  uint8_t* buffer = ptr_current_;
+  while (byte_count > 0) {
+    int bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, byte_count));
+    if (bytes_read == 0) {
+      *error += "Profile EOF reached prematurely for " + source;
+      return kProfileLoadBadData;
+    } else if (bytes_read < 0) {
+      *error += "Profile IO error for " + source + strerror(errno);
+      return kProfileLoadIOError;
     }
+    byte_count -= bytes_read;
+    buffer += bytes_read;
   }
-  int append_limit = new_line_pos == -1 ? n : new_line_pos;
-  line.append(buffer + start_from, append_limit - start_from);
-  // Jump over kLineSeparator and return the position of the next character.
-  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileHeader(
+      int fd,
+      /*out*/uint16_t* number_of_lines,
+      /*out*/std::string* error) {
+  // Read magic and version
+  const size_t kMagicVersionSize =
+    sizeof(kProfileMagic) +
+    sizeof(kProfileVersion) +
+    sizeof(uint16_t);  // number of lines
+
+  SafeBuffer safe_buffer(kMagicVersionSize);
+
+  ProfileLoadSatus status = safe_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  if (!safe_buffer.CompareAndAdvance(kProfileMagic, sizeof(kProfileMagic))) {
+    *error = "Profile missing magic";
+    return kProfileLoadVersionMismatch;
+  }
+  if (!safe_buffer.CompareAndAdvance(kProfileVersion, sizeof(kProfileVersion))) {
+    *error = "Profile version mismatch";
+    return kProfileLoadVersionMismatch;
+  }
+  *number_of_lines = safe_buffer.ReadUintAndAdvance<uint16_t>();
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLineHeader(
+      int fd,
+      /*out*/ProfileLineHeader* line_header,
+      /*out*/std::string* error) {
+  SafeBuffer header_buffer(kLineHeaderSize);
+  ProfileLoadSatus status = header_buffer.FillFromFd(fd, "ReadProfileHeader", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  uint16_t dex_location_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->method_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->class_set_size = header_buffer.ReadUintAndAdvance<uint16_t>();
+  line_header->checksum = header_buffer.ReadUintAndAdvance<uint32_t>();
+
+  if (dex_location_size == 0 || dex_location_size > kMaxDexFileKeyLength) {
+    *error = "DexFileKey has an invalid size: " + std::to_string(dex_location_size);
+    return kProfileLoadBadData;
+  }
+
+  SafeBuffer location_buffer(dex_location_size);
+  status = location_buffer.FillFromFd(fd, "ReadProfileHeaderDexLocation", error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+  line_header->dex_location.assign(
+      reinterpret_cast<char*>(location_buffer.Get()), dex_location_size);
+  return kProfileLoadSuccess;
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::ReadProfileLine(
+      int fd,
+      const ProfileLineHeader& line_header,
+      /*out*/std::string* error) {
+  // Make sure that we don't try to read everything in memory (in case the profile if full).
+  // Split readings in chunks of at most 10kb.
+  static constexpr uint16_t kMaxNumberOfEntriesToRead = 5120;
+  uint16_t methods_left_to_read = line_header.method_set_size;
+  uint16_t classes_left_to_read = line_header.class_set_size;
+
+  while ((methods_left_to_read > 0) || (classes_left_to_read > 0)) {
+    uint16_t methods_to_read = std::min(kMaxNumberOfEntriesToRead, methods_left_to_read);
+    uint16_t max_classes_to_read = kMaxNumberOfEntriesToRead - methods_to_read;
+    uint16_t classes_to_read = std::min(max_classes_to_read, classes_left_to_read);
+
+    size_t line_size = sizeof(uint16_t) * (methods_to_read + classes_to_read);
+    SafeBuffer line_buffer(line_size);
+
+    ProfileLoadSatus status = line_buffer.FillFromFd(fd, "ReadProfileLine", error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    if (!ProcessLine(line_buffer,
+                     methods_to_read,
+                     classes_to_read,
+                     line_header.checksum,
+                     line_header.dex_location)) {
+      *error = "Error when reading profile file line";
+      return kProfileLoadBadData;
+    }
+    methods_left_to_read -= methods_to_read;
+    classes_left_to_read -= classes_to_read;
+  }
+  return kProfileLoadSuccess;
 }
 
 bool ProfileCompilationInfo::Load(int fd) {
+  std::string error;
+  ProfileLoadSatus status = LoadInternal(fd, &error);
+
+  if (status == kProfileLoadSuccess) {
+    return true;
+  } else {
+    PLOG(WARNING) << "Error when reading profile " << error;
+    return false;
+  }
+}
+
+ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
+      int fd, std::string* error) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
-  std::string current_line;
-  const int kBufferSize = 1024;
-  char buffer[kBufferSize];
-
-  while (true) {
-    int n = TEMP_FAILURE_RETRY(read(fd, buffer, kBufferSize));
-    if (n < 0) {
-      PLOG(WARNING) << "Error when reading profile file";
-      return false;
-    } else if (n == 0) {
-      break;
-    }
-    // Detect the new lines from the buffer. If we manage to complete a line,
-    // process it. Otherwise append to the current line.
-    int current_start_pos = 0;
-    while (current_start_pos < n) {
-      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
-      if (current_start_pos == -1) {
-        break;
-      }
-      if (!ProcessLine(current_line)) {
-        return false;
-      }
-      // Reset the current line (we just processed it).
-      current_line.clear();
-    }
+  struct stat stat_buffer;
+  if (fstat(fd, &stat_buffer) != 0) {
+    return kProfileLoadIOError;
   }
-  return true;
+  // We allow empty profile files.
+  // Profiles may be created by ActivityManager or installd before we manage to
+  // process them in the runtime or profman.
+  if (stat_buffer.st_size == 0) {
+    return kProfileLoadSuccess;
+  }
+  // Read profile header: magic + version + number_of_lines.
+  uint16_t number_of_lines;
+  ProfileLoadSatus status = ReadProfileHeader(fd, &number_of_lines, error);
+  if (status != kProfileLoadSuccess) {
+    return status;
+  }
+
+  while (number_of_lines > 0) {
+    ProfileLineHeader line_header;
+    // First, read the line header to get the amount of data we need to read.
+    status = ReadProfileLineHeader(fd, &line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+
+    // Now read the actual profile line.
+    status = ReadProfileLine(fd, line_header, error);
+    if (status != kProfileLoadSuccess) {
+      return status;
+    }
+    number_of_lines--;
+  }
+
+  // Check that we read everything and that profiles don't contain junk data.
+  int result = testEOF(fd);
+  if (result == 0) {
+    return kProfileLoadSuccess;
+  } else if (result < 0) {
+    return kProfileLoadIOError;
+  } else {
+    *error = "Unexpected content in the profile file";
+    return kProfileLoadBadData;
+  }
 }
 
-bool ProfileCompilationInfo::Load(const ProfileCompilationInfo& other) {
+bool ProfileCompilationInfo::MergeWith(const ProfileCompilationInfo& other) {
+  // First verify that all checksums match. This will avoid adding garbage to
+  // the current profile info.
+  // Note that the number of elements should be very small, so this should not
+  // be a performance issue.
+  for (const auto& other_it : other.info_) {
+    auto info_it = info_.find(other_it.first);
+    if ((info_it != info_.end()) && (info_it->second.checksum != other_it.second.checksum)) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_it.first;
+      return false;
+    }
+  }
+  // All checksums match. Import the data.
   for (const auto& other_it : other.info_) {
     const std::string& other_dex_location = other_it.first;
     const DexFileData& other_dex_data = other_it.second;
-
     auto info_it = info_.find(other_dex_location);
     if (info_it == info_.end()) {
       info_it = info_.Put(other_dex_location, DexFileData(other_dex_data.checksum));
     }
-    if (info_it->second.checksum != other_dex_data.checksum) {
-      LOG(WARNING) << "Checksum mismatch for dex " << other_dex_location;
-      return false;
-    }
     info_it->second.method_set.insert(other_dex_data.method_set.begin(),
                                       other_dex_data.method_set.end());
     info_it->second.class_set.insert(other_dex_data.class_set.begin(),
@@ -387,6 +578,14 @@
   return total;
 }
 
+uint32_t ProfileCompilationInfo::GetNumberOfResolvedClasses() const {
+  uint32_t total = 0;
+  for (const auto& it : info_) {
+    total += it.second.class_set.size();
+  }
+  return total;
+}
+
 std::string ProfileCompilationInfo::DumpInfo(const std::vector<const DexFile*>* dex_files,
                                              bool print_full_dex_location) const {
   std::ostringstream os;
@@ -408,19 +607,29 @@
       std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
-    for (const auto method_it : dex_data.method_set) {
-      if (dex_files != nullptr) {
-        const DexFile* dex_file = nullptr;
-        for (size_t i = 0; i < dex_files->size(); i++) {
-          if (location == (*dex_files)[i]->GetLocation()) {
-            dex_file = (*dex_files)[i];
-          }
-        }
-        if (dex_file != nullptr) {
-          os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+    const DexFile* dex_file = nullptr;
+    if (dex_files != nullptr) {
+      for (size_t i = 0; i < dex_files->size(); i++) {
+        if (location == (*dex_files)[i]->GetLocation()) {
+          dex_file = (*dex_files)[i];
         }
       }
-      os << "\n  " << method_it;
+    }
+    os << "\n\tmethods: ";
+    for (const auto method_it : dex_data.method_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << PrettyMethod(method_it, *dex_file, true);
+      } else {
+        os << method_it << ",";
+      }
+    }
+    os << "\n\tclasses: ";
+    for (const auto class_it : dex_data.class_set) {
+      if (dex_file != nullptr) {
+        os << "\n\t\t" << PrettyType(class_it, *dex_file);
+      } else {
+        os << class_it << ",";
+      }
     }
   }
   return os.str();
@@ -442,4 +651,10 @@
   return ret;
 }
 
+void ProfileCompilationInfo::ClearResolvedClasses() {
+  for (auto& pair : info_) {
+    pair.second.class_set.clear();
+  }
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index df03244..5a07da7 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -28,9 +28,6 @@
 
 namespace art {
 
-class ArtMethod;
-class DexCacheProfileData;
-
 // TODO: rename file.
 /**
  * Profile information in a format suitable to be queried by the compiler and
@@ -41,21 +38,29 @@
  */
 class ProfileCompilationInfo {
  public:
-  // Saves profile information about the given methods in the given file.
-  // Note that the saving proceeds only if the file can be locked for exclusive access.
-  // If not (the locking is not blocking), the function does not save and returns false.
-  static bool SaveProfilingInfo(const std::string& filename,
-                                const std::vector<ArtMethod*>& methods,
-                                const std::set<DexCacheResolvedClasses>& resolved_classes);
+  static const uint8_t kProfileMagic[];
+  static const uint8_t kProfileVersion[];
 
+  // Add the given methods and classes to the current profile object.
+  bool AddMethodsAndClasses(const std::vector<MethodReference>& methods,
+                            const std::set<DexCacheResolvedClasses>& resolved_classes);
   // Loads profile information from the given file descriptor.
   bool Load(int fd);
-  // Loads the data from another ProfileCompilationInfo object.
-  bool Load(const ProfileCompilationInfo& info);
+  // Merge the data from another ProfileCompilationInfo into the current object.
+  bool MergeWith(const ProfileCompilationInfo& info);
   // Saves the profile data to the given file descriptor.
   bool Save(int fd);
+  // Loads and merges profile information from the given file into the current
+  // object and tries to save it back to disk.
+  // If `force` is true then the save will go through even if the given file
+  // has bad data or its version does not match. In this cases the profile content
+  // is ignored.
+  bool MergeAndSave(const std::string& filename, uint64_t* bytes_written, bool force);
+
   // Returns the number of methods that were profiled.
   uint32_t GetNumberOfMethods() const;
+  // Returns the number of resolved classes that were profiled.
+  uint32_t GetNumberOfResolvedClasses() const;
 
   // Returns true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
@@ -70,8 +75,8 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
-  // For testing purposes.
   bool Equals(const ProfileCompilationInfo& other);
+
   static std::string GetProfileDexFileKey(const std::string& dex_location);
 
   // Returns the class descriptors for all of the classes in the profiles' class sets.
@@ -79,7 +84,17 @@
   // profile info stuff to generate a map back to the dex location.
   std::set<DexCacheResolvedClasses> GetResolvedClasses() const;
 
+  // Clears the resolved classes from the current object.
+  void ClearResolvedClasses();
+
  private:
+  enum ProfileLoadSatus {
+    kProfileLoadIOError,
+    kProfileLoadVersionMismatch,
+    kProfileLoadBadData,
+    kProfileLoadSuccess
+  };
+
   struct DexFileData {
     explicit DexFileData(uint32_t location_checksum) : checksum(location_checksum) {}
     uint32_t checksum;
@@ -96,9 +111,65 @@
   DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
   bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t class_idx);
-  bool AddResolvedClasses(const DexCacheResolvedClasses& classes)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool ProcessLine(const std::string& line);
+  bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
+
+  // Parsing functionality.
+
+  struct ProfileLineHeader {
+    std::string dex_location;
+    uint16_t method_set_size;
+    uint16_t class_set_size;
+    uint32_t checksum;
+  };
+
+  // A helper structure to make sure we don't read past our buffers in the loops.
+  struct SafeBuffer {
+   public:
+    explicit SafeBuffer(size_t size) : storage_(new uint8_t[size]) {
+      ptr_current_ = storage_.get();
+      ptr_end_ = ptr_current_ + size;
+    }
+
+    // Reads the content of the descriptor at the current position.
+    ProfileLoadSatus FillFromFd(int fd,
+                                const std::string& source,
+                                /*out*/std::string* error);
+
+    // Reads an uint value (high bits to low bits) and advances the current pointer
+    // with the number of bits read.
+    template <typename T> T ReadUintAndAdvance();
+
+    // Compares the given data with the content current pointer. If the contents are
+    // equal it advances the current pointer by data_size.
+    bool CompareAndAdvance(const uint8_t* data, size_t data_size);
+
+    // Get the underlying raw buffer.
+    uint8_t* Get() { return storage_.get(); }
+
+   private:
+    std::unique_ptr<uint8_t> storage_;
+    uint8_t* ptr_current_;
+    uint8_t* ptr_end_;
+  };
+
+  ProfileLoadSatus LoadInternal(int fd, std::string* error);
+
+  ProfileLoadSatus ReadProfileHeader(int fd,
+                                     /*out*/uint16_t* number_of_lines,
+                                     /*out*/std::string* error);
+
+  ProfileLoadSatus ReadProfileLineHeader(int fd,
+                                         /*out*/ProfileLineHeader* line_header,
+                                         /*out*/std::string* error);
+  ProfileLoadSatus ReadProfileLine(int fd,
+                                   const ProfileLineHeader& line_header,
+                                   /*out*/std::string* error);
+
+  bool ProcessLine(SafeBuffer& line_buffer,
+                   uint16_t method_set_size,
+                   uint16_t class_set_size,
+                   uint32_t checksum,
+                   const std::string& dex_location);
 
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index fdd8c6e..c8f4d94 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -21,6 +21,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
+#include "method_reference.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
@@ -49,16 +50,44 @@
     return methods;
   }
 
-  bool AddData(const std::string& dex_location,
-               uint32_t checksum,
-               uint16_t method_index,
-               ProfileCompilationInfo* info) {
+  bool AddMethod(const std::string& dex_location,
+                 uint32_t checksum,
+                 uint16_t method_index,
+                 ProfileCompilationInfo* info) {
     return info->AddMethodIndex(dex_location, checksum, method_index);
   }
 
+  bool AddClass(const std::string& dex_location,
+                uint32_t checksum,
+                uint16_t class_index,
+                ProfileCompilationInfo* info) {
+    return info->AddMethodIndex(dex_location, checksum, class_index);
+  }
+
   uint32_t GetFd(const ScratchFile& file) {
     return static_cast<uint32_t>(file.GetFd());
   }
+
+  bool SaveProfilingInfo(
+      const std::string& filename,
+      const std::vector<ArtMethod*>& methods,
+      const std::set<DexCacheResolvedClasses>& resolved_classes) {
+    ProfileCompilationInfo info;
+    std::vector<MethodReference> method_refs;
+    ScopedObjectAccess soa(Thread::Current());
+    for (ArtMethod* method : methods) {
+      method_refs.emplace_back(method->GetDexFile(), method->GetDexMethodIndex());
+    }
+    if (!info.AddMethodsAndClasses(method_refs, resolved_classes)) {
+      return false;
+    }
+    return info.MergeAndSave(filename, nullptr, false);
+  }
+
+  // Cannot sizeof the actual arrays so hardcode the values here.
+  // They should not change anyway.
+  static constexpr int kProfileMagicSize = 4;
+  static constexpr int kProfileVersionSize = 4;
 };
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
@@ -75,9 +104,7 @@
   // Save virtual methods from Main.
   std::set<DexCacheResolvedClasses> resolved_classes;
   std::vector<ArtMethod*> main_methods = GetVirtualMethods(class_loader, "LMain;");
-  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(),
-                                                        main_methods,
-                                                        resolved_classes));
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), main_methods, resolved_classes));
 
   // Check that what we saved is in the profile.
   ProfileCompilationInfo info1;
@@ -92,9 +119,7 @@
 
   // Save virtual methods from Second.
   std::vector<ArtMethod*> second_methods = GetVirtualMethods(class_loader, "LSecond;");
-  ASSERT_TRUE(ProfileCompilationInfo::SaveProfilingInfo(profile.GetFilename(),
-                                                        second_methods,
-                                                        resolved_classes));
+  ASSERT_TRUE(SaveProfilingInfo(profile.GetFilename(), second_methods, resolved_classes));
 
   // Check that what we saved is in the profile (methods form Main and Second).
   ProfileCompilationInfo info2;
@@ -118,8 +143,8 @@
   ProfileCompilationInfo saved_info;
   // Save a few methods.
   for (uint16_t i = 0; i < 10; i++) {
-    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
   }
   ASSERT_TRUE(saved_info.Save(GetFd(profile)));
   ASSERT_EQ(0, profile.GetFile()->Flush());
@@ -132,9 +157,9 @@
 
   // Save more methods.
   for (uint16_t i = 0; i < 100; i++) {
-    ASSERT_TRUE(AddData("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
-    ASSERT_TRUE(AddData("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location3", /* checksum */ 3, /* method_idx */ i, &saved_info));
   }
   ASSERT_TRUE(profile.GetFile()->ResetOffset());
   ASSERT_TRUE(saved_info.Save(GetFd(profile)));
@@ -147,25 +172,156 @@
   ASSERT_TRUE(loaded_info2.Equals(saved_info));
 }
 
-TEST_F(ProfileCompilationInfoTest, AddDataFail) {
+TEST_F(ProfileCompilationInfoTest, AddMethodsAndClassesFail) {
   ScratchFile profile;
 
   ProfileCompilationInfo info;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info));
   // Trying to add info for an existing file but with a different checksum.
-  ASSERT_FALSE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
+  ASSERT_FALSE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info));
 }
 
-TEST_F(ProfileCompilationInfoTest, LoadFail) {
+TEST_F(ProfileCompilationInfoTest, MergeFail) {
   ScratchFile profile;
 
   ProfileCompilationInfo info1;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 1, /* method_idx */ 1, &info1));
   // Use the same file, change the checksum.
   ProfileCompilationInfo info2;
-  ASSERT_TRUE(AddData("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
+  ASSERT_TRUE(AddMethod("dex_location", /* checksum */ 2, /* method_idx */ 2, &info2));
 
-  ASSERT_FALSE(info1.Load(info2));
+  ASSERT_FALSE(info1.MergeWith(info2));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveMaxMethods) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &saved_info));
+  }
+  // Save the maximum number of classes
+  for (uint16_t i = 0; i < std::numeric_limits<uint16_t>::max(); i++) {
+    ASSERT_TRUE(AddClass("dex_location1", /* checksum */ 1, /* class_idx */ i, &saved_info));
+    ASSERT_TRUE(AddClass("dex_location2", /* checksum */ 2, /* class_idx */ i, &saved_info));
+  }
+
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, SaveEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadEmpty) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo empyt_info;
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  ASSERT_TRUE(loaded_info.Equals(empyt_info));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadMagic) {
+  ScratchFile profile;
+  uint8_t buffer[] = { 1, 2, 3, 4 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(buffer, sizeof(buffer)));
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, BadVersion) {
+  ScratchFile profile;
+
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  uint8_t version[] = { 'v', 'e', 'r', 's', 'i', 'o', 'n' };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(version, sizeof(version)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, Incomplete) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, TooLongDexLocation) {
+  ScratchFile profile;
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileMagic, kProfileMagicSize));
+  ASSERT_TRUE(profile.GetFile()->WriteFully(
+      ProfileCompilationInfo::kProfileVersion, kProfileVersionSize));
+  // Write that we have at least one line.
+  uint8_t line_number[] = { 0, 1 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line_number, sizeof(line_number)));
+
+  // dex_location_size, methods_size, classes_size, checksum.
+  // Dex location size is too big and should be rejected.
+  uint8_t line[] = { 255, 255, 0, 1, 0, 1, 0, 0, 0, 0 };
+  ASSERT_TRUE(profile.GetFile()->WriteFully(line, sizeof(line)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, UnexpectedContent) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save the maximum number of methods
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+
+  uint8_t random_data[] = { 1, 2, 3};
+  ASSERT_TRUE(profile.GetFile()->WriteFully(random_data, sizeof(random_data)));
+
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Check that we fail because of unexpected data at the end of the file.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_FALSE(loaded_info.Load(GetFd(profile)));
 }
 
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 6fe17db..7a9d250 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -22,25 +22,25 @@
 
 #include "art_method-inl.h"
 #include "base/systrace.h"
-#include "scoped_thread_state_change.h"
+#include "base/time_utils.h"
+#include "compiler_filter.h"
 #include "oat_file_manager.h"
+#include "scoped_thread_state_change.h"
+
 
 namespace art {
 
-// An arbitrary value to throttle save requests. Set to 2s for now.
-static constexpr const uint64_t kMilisecondsToNano = 1000000;
-static constexpr const uint64_t kMinimumTimeBetweenCodeCacheUpdatesNs = 2000 * kMilisecondsToNano;
-
 // TODO: read the constants from ProfileOptions,
 // Add a random delay each time we go to sleep so that we don't hammer the CPU
 // with all profile savers running at the same time.
-static constexpr const uint64_t kRandomDelayMaxMs = 20 * 1000;  // 20 seconds
-static constexpr const uint64_t kMaxBackoffMs = 5 * 60 * 1000;  // 5 minutes
-static constexpr const uint64_t kSavePeriodMs = 10 * 1000;  // 10 seconds
-static constexpr const uint64_t kInitialDelayMs = 2 * 1000;  // 2 seconds
-static constexpr const double kBackoffCoef = 1.5;
+static constexpr const uint64_t kRandomDelayMaxMs = 30 * 1000;  // 30 seconds
+static constexpr const uint64_t kMaxBackoffMs = 10 * 60 * 1000;  // 10 minutes
+static constexpr const uint64_t kSavePeriodMs = 20 * 1000;  // 20 seconds
+static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+static constexpr const double kBackoffCoef = 2.0;
 
-static constexpr const uint32_t kMinimumNrOrMethodsToSave = 10;
+static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
+static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
 
 ProfileSaver* ProfileSaver::instance_ = nullptr;
 pthread_t ProfileSaver::profiler_pthread_ = 0U;
@@ -52,13 +52,21 @@
                            const std::string& app_data_dir)
     : jit_code_cache_(jit_code_cache),
       foreign_dex_profile_path_(foreign_dex_profile_path),
-      code_cache_last_update_time_ns_(0),
       shutting_down_(false),
-      first_profile_(true),
+      last_save_number_of_methods_(0),
+      last_save_number_of_classes_(0),
       wait_lock_("ProfileSaver wait lock"),
-      period_condition_("ProfileSaver period condition", wait_lock_) {
-  AddTrackedLocations(output_filename, code_paths);
-  app_data_dir_ = "";
+      period_condition_("ProfileSaver period condition", wait_lock_),
+      total_bytes_written_(0),
+      total_number_of_writes_(0),
+      total_number_of_code_cache_queries_(0),
+      total_number_of_skipped_writes_(0),
+      total_number_of_failed_writes_(0),
+      total_ms_of_sleep_(0),
+      total_ns_of_work_(0),
+      total_number_of_foreign_dex_marks_(0),
+      max_number_of_profile_entries_cached_(0) {
+  AddTrackedLocations(output_filename, app_data_dir, code_paths);
   if (!app_data_dir.empty()) {
     // The application directory is used to determine which dex files are owned by app.
     // Since it could be a symlink (e.g. /data/data instead of /data/user/0), and we
@@ -66,9 +74,9 @@
     // store it's canonical form to be sure we use the same base when comparing.
     UniqueCPtr<const char[]> app_data_dir_real_path(realpath(app_data_dir.c_str(), nullptr));
     if (app_data_dir_real_path != nullptr) {
-      app_data_dir_.assign(app_data_dir_real_path.get());
+      app_data_dirs_.emplace(app_data_dir_real_path.get());
     } else {
-      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir_
+      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir
           << ". The app dir will not be used to determine which dex files belong to the app";
     }
   }
@@ -80,14 +88,13 @@
 
   uint64_t save_period_ms = kSavePeriodMs;
   VLOG(profiler) << "Save profiling information every " << save_period_ms << " ms";
-
-  bool first_iteration = true;
+  bool cache_resolved_classes = true;
   while (!ShuttingDown(self)) {
     uint64_t sleep_time_ms;
-    if (first_iteration) {
+    if (cache_resolved_classes) {
       // Sleep less long for the first iteration since we want to record loaded classes shortly
       // after app launch.
-      sleep_time_ms = kInitialDelayMs;
+      sleep_time_ms = kSaveResolvedClassesDelayMs;
     } else {
       const uint64_t random_sleep_delay_ms = rand() % kRandomDelayMaxMs;
       sleep_time_ms = save_period_ms + random_sleep_delay_ms;
@@ -96,76 +103,146 @@
       MutexLock mu(self, wait_lock_);
       period_condition_.TimedWait(self, sleep_time_ms, 0);
     }
-
+    total_ms_of_sleep_ += sleep_time_ms;
     if (ShuttingDown(self)) {
       break;
     }
 
-    if (!ProcessProfilingInfo() && save_period_ms < kMaxBackoffMs) {
-      // If we don't need to save now it is less likely that we will need to do
-      // so in the future. Increase the time between saves according to the
-      // kBackoffCoef, but make it no larger than kMaxBackoffMs.
-      save_period_ms = static_cast<uint64_t>(kBackoffCoef * save_period_ms);
+    uint64_t start = NanoTime();
+    if (cache_resolved_classes) {
+      // TODO(calin) This only considers the case of the primary profile file.
+      // Anything that gets loaded in the same VM will not have their resolved
+      // classes save (unless they started before the initial saving was done).
+      FetchAndCacheResolvedClasses();
     } else {
-      // Reset the period to the initial value as it's highly likely to JIT again.
-      save_period_ms = kSavePeriodMs;
+      bool profile_saved_to_disk = ProcessProfilingInfo();
+      if (profile_saved_to_disk) {
+        // Reset the period to the initial value as it's highly likely to JIT again.
+        save_period_ms = kSavePeriodMs;
+        VLOG(profiler) << "Profile saver: saved something, period reset to: " << save_period_ms;
+      } else {
+        // If we don't need to save now it is less likely that we will need to do
+        // so in the future. Increase the time between saves according to the
+        // kBackoffCoef, but make it no larger than kMaxBackoffMs.
+        save_period_ms = std::min(kMaxBackoffMs,
+                                  static_cast<uint64_t>(kBackoffCoef * save_period_ms));
+        VLOG(profiler) << "Profile saver: nothing to save, delaying period to: " << save_period_ms;
+      }
     }
-    first_iteration = false;
+    cache_resolved_classes = false;
+
+    total_ns_of_work_ += (NanoTime() - start);
   }
 }
 
+ProfileCompilationInfo* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
+  auto info_it = profile_cache_.find(filename);
+  if (info_it == profile_cache_.end()) {
+    info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+  }
+  return &info_it->second;
+}
+
+void ProfileSaver::FetchAndCacheResolvedClasses() {
+  ScopedTrace trace(__PRETTY_FUNCTION__);
+
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  std::set<DexCacheResolvedClasses> resolved_classes =
+      class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  uint64_t total_number_of_profile_entries_cached = 0;
+  for (const auto& it : tracked_dex_base_locations_) {
+      std::set<DexCacheResolvedClasses> resolved_classes_for_location;
+    const std::string& filename = it.first;
+    const std::set<std::string>& locations = it.second;
+
+    for (const DexCacheResolvedClasses& classes : resolved_classes) {
+      if (locations.find(classes.GetDexLocation()) != locations.end()) {
+        resolved_classes_for_location.insert(classes);
+      }
+    }
+    ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
+    info->AddMethodsAndClasses(std::vector<MethodReference>(), resolved_classes_for_location);
+    total_number_of_profile_entries_cached += resolved_classes_for_location.size();
+  }
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+}
+
 bool ProfileSaver::ProcessProfilingInfo() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  uint64_t last_update_time_ns = jit_code_cache_->GetLastUpdateTimeNs();
-  if (!first_profile_ && last_update_time_ns - code_cache_last_update_time_ns_
-          < kMinimumTimeBetweenCodeCacheUpdatesNs) {
-    VLOG(profiler) << "Not enough time has passed since the last code cache update."
-        << "Last update: " << last_update_time_ns
-        << " Last save: " << code_cache_last_update_time_ns_;
-    return false;
-  }
-
-  uint64_t start = NanoTime();
-  code_cache_last_update_time_ns_ = last_update_time_ns;
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
     // Make a copy so that we don't hold the lock while doing I/O.
     MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
     tracked_locations = tracked_dex_base_locations_;
   }
+
+  bool profile_file_saved = false;
+  uint64_t total_number_of_profile_entries_cached = 0;
   for (const auto& it : tracked_locations) {
     if (ShuttingDown(Thread::Current())) {
       return true;
     }
     const std::string& filename = it.first;
     const std::set<std::string>& locations = it.second;
-    std::vector<ArtMethod*> methods;
+    std::vector<MethodReference> methods;
     {
       ScopedObjectAccess soa(Thread::Current());
-      jit_code_cache_->GetCompiledArtMethods(locations, methods);
+      jit_code_cache_->GetProfiledMethods(locations, methods);
+      total_number_of_code_cache_queries_++;
     }
-    // Always save for the first one for loaded classes profile.
-    if (methods.size() < kMinimumNrOrMethodsToSave && !first_profile_) {
+
+    ProfileCompilationInfo* cached_info = GetCachedProfiledInfo(filename);
+    cached_info->AddMethodsAndClasses(methods, std::set<DexCacheResolvedClasses>());
+    int64_t delta_number_of_methods =
+        cached_info->GetNumberOfMethods() -
+        static_cast<int64_t>(last_save_number_of_methods_);
+    int64_t delta_number_of_classes =
+        cached_info->GetNumberOfResolvedClasses() -
+        static_cast<int64_t>(last_save_number_of_classes_);
+
+    if (delta_number_of_methods < kMinimumNumberOfMethodsToSave &&
+        delta_number_of_classes < kMinimumNumberOfClassesToSave) {
       VLOG(profiler) << "Not enough information to save to: " << filename
-          <<" Nr of methods: " << methods.size();
-      return false;
+          << " Nr of methods: " << delta_number_of_methods
+          << " Nr of classes: " << delta_number_of_classes;
+      total_number_of_skipped_writes_++;
+      continue;
     }
-
-    std::set<DexCacheResolvedClasses> resolved_classes;
-    if (first_profile_) {
-      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-      resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/true);
-    }
-
-    if (!ProfileCompilationInfo::SaveProfilingInfo(filename, methods, resolved_classes)) {
+    uint64_t bytes_written;
+    // Force the save. In case the profile data is corrupted or the the profile
+    // has the wrong version this will "fix" the file to the correct format.
+    if (cached_info->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
+      last_save_number_of_methods_ = cached_info->GetNumberOfMethods();
+      last_save_number_of_classes_ = cached_info->GetNumberOfResolvedClasses();
+      // Clear resolved classes. No need to store them around as
+      // they don't change after the first write.
+      cached_info->ClearResolvedClasses();
+      if (bytes_written > 0) {
+        total_number_of_writes_++;
+        total_bytes_written_ += bytes_written;
+        profile_file_saved = true;
+      } else {
+        // At this point we could still have avoided the write.
+        // We load and merge the data from the file lazily at its first ever
+        // save attempt. So, whatever we are trying to save could already be
+        // in the file.
+        total_number_of_skipped_writes_++;
+      }
+    } else {
       LOG(WARNING) << "Could not save profiling info to " << filename;
-      return false;
+      total_number_of_failed_writes_++;
     }
-
-    VLOG(profiler) << "Profile process time: " << PrettyDuration(NanoTime() - start);
+    total_number_of_profile_entries_cached +=
+        cached_info->GetNumberOfMethods() +
+        cached_info->GetNumberOfResolvedClasses();
   }
-  first_profile_ = false;
-  return true;
+  max_number_of_profile_entries_cached_ = std::max(
+      max_number_of_profile_entries_cached_,
+      total_number_of_profile_entries_cached);
+  return profile_file_saved;
 }
 
 void* ProfileSaver::RunProfileSaverThread(void* arg) {
@@ -183,6 +260,26 @@
   return nullptr;
 }
 
+static bool ShouldProfileLocation(const std::string& location) {
+  OatFileManager& oat_manager = Runtime::Current()->GetOatFileManager();
+  const OatFile* oat_file = oat_manager.FindOpenedOatFileFromDexLocation(location);
+  if (oat_file == nullptr) {
+    // This can happen if we fallback to run code directly from the APK.
+    // Profile it with the hope that the background dexopt will get us back into
+    // a good state.
+    VLOG(profiler) << "Asked to profile a location without an oat file:" << location;
+    return true;
+  }
+  CompilerFilter::Filter filter = oat_file->GetCompilerFilter();
+  if ((filter == CompilerFilter::kSpeed) || (filter == CompilerFilter::kEverything)) {
+    VLOG(profiler)
+        << "Skip profiling oat file because it's already speed|everything compiled: "
+        << location << " oat location: " << oat_file->GetLocation();
+    return false;
+  }
+  return true;
+}
+
 void ProfileSaver::Start(const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
                          const std::vector<std::string>& code_paths,
@@ -192,6 +289,18 @@
   DCHECK(!output_filename.empty());
   DCHECK(jit_code_cache != nullptr);
 
+  std::vector<std::string> code_paths_to_profile;
+
+  for (const std::string& location : code_paths) {
+    if (ShouldProfileLocation(location))  {
+      code_paths_to_profile.push_back(location);
+    }
+  }
+  if (code_paths_to_profile.empty()) {
+    VLOG(profiler) << "No code paths should be profiled.";
+    return;
+  }
+
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
     // If we already have an instance, make sure it uses the same jit_code_cache.
@@ -199,16 +308,16 @@
     // apps which share the same runtime).
     DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
     // Add the code_paths to the tracked locations.
-    instance_->AddTrackedLocations(output_filename, code_paths);
+    instance_->AddTrackedLocations(output_filename, app_data_dir, code_paths_to_profile);
     return;
   }
 
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
-      << ". Tracking: " << Join(code_paths, ':');
+      << ". Tracking: " << Join(code_paths_to_profile, ':');
 
   instance_ = new ProfileSaver(output_filename,
                                jit_code_cache,
-                               code_paths,
+                               code_paths_to_profile,
                                foreign_dex_profile_path,
                                app_data_dir);
 
@@ -219,7 +328,7 @@
       "Profile saver thread");
 }
 
-void ProfileSaver::Stop() {
+void ProfileSaver::Stop(bool dump_info) {
   ProfileSaver* profile_saver = nullptr;
   pthread_t profiler_pthread = 0U;
 
@@ -237,6 +346,9 @@
       return;
     }
     instance_->shutting_down_ = true;
+    if (dump_info) {
+      instance_->DumpInfo(LOG(INFO));
+    }
   }
 
   {
@@ -267,49 +379,62 @@
 }
 
 void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
+                                       const std::string& app_data_dir,
                                        const std::vector<std::string>& code_paths) {
   auto it = tracked_dex_base_locations_.find(output_filename);
   if (it == tracked_dex_base_locations_.end()) {
     tracked_dex_base_locations_.Put(output_filename,
                                     std::set<std::string>(code_paths.begin(), code_paths.end()));
+    app_data_dirs_.insert(app_data_dir);
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
 void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
+  if (!ShouldProfileLocation(dex_location)) {
+    return;
+  }
   std::set<std::string> app_code_paths;
   std::string foreign_dex_profile_path;
-  std::string app_data_dir;
+  std::set<std::string> app_data_dirs;
   {
     MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    DCHECK(instance_ != nullptr);
+    if (instance_ == nullptr) {
+      return;
+    }
     // Make a copy so that we don't hold the lock while doing I/O.
     for (const auto& it : instance_->tracked_dex_base_locations_) {
       app_code_paths.insert(it.second.begin(), it.second.end());
     }
     foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
-    app_data_dir = instance_->app_data_dir_;
+    app_data_dirs.insert(instance_->app_data_dirs_.begin(), instance_->app_data_dirs_.end());
   }
 
-  MaybeRecordDexUseInternal(dex_location,
-                            app_code_paths,
-                            foreign_dex_profile_path,
-                            app_data_dir);
+  bool mark_created = MaybeRecordDexUseInternal(dex_location,
+                                                app_code_paths,
+                                                foreign_dex_profile_path,
+                                                app_data_dirs);
+  if (mark_created) {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    if (instance_ != nullptr) {
+      instance_->total_number_of_foreign_dex_marks_++;
+    }
+  }
 }
 
-void ProfileSaver::MaybeRecordDexUseInternal(
+bool ProfileSaver::MaybeRecordDexUseInternal(
       const std::string& dex_location,
       const std::set<std::string>& app_code_paths,
       const std::string& foreign_dex_profile_path,
-      const std::string& app_data_dir) {
+      const std::set<std::string>& app_data_dirs) {
   if (dex_location.empty()) {
     LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
-    return;
+    return false;
   }
   if (foreign_dex_profile_path.empty()) {
     LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
-    return;
+    return false;
   }
 
   UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
@@ -320,14 +445,14 @@
     ? dex_location.c_str()
     : dex_location_real_path.get());
 
-  if (dex_location_real_path_str.compare(0, app_data_dir.length(), app_data_dir) == 0) {
+  if (app_data_dirs.find(dex_location_real_path_str) != app_data_dirs.end()) {
     // The dex location is under the application folder. Nothing to record.
-    return;
+    return false;
   }
 
   if (app_code_paths.find(dex_location) != app_code_paths.end()) {
     // The dex location belongs to the application code paths. Nothing to record.
-    return;
+    return false;
   }
   // Do another round of checks with the real paths.
   // Note that we could cache all the real locations in the saver (since it's an expensive
@@ -344,7 +469,7 @@
         : real_app_code_location.get());
     if (real_app_code_location_str == dex_location_real_path_str) {
       // The dex location belongs to the application code paths. Nothing to record.
-      return;
+      return false;
     }
   }
 
@@ -362,12 +487,37 @@
     if (close(fd) != 0) {
       PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
     }
+    return true;
   } else {
     if (errno != EEXIST) {
       // Another app could have already created the file.
       PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
+      return false;
     }
+    return true;
   }
 }
 
+void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
+  MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+  if (instance_ != nullptr) {
+    instance_->DumpInfo(os);
+  }
+}
+
+void ProfileSaver::DumpInfo(std::ostream& os) {
+  os << "ProfileSaver total_bytes_written=" << total_bytes_written_ << '\n'
+     << "ProfileSaver total_number_of_writes=" << total_number_of_writes_ << '\n'
+     << "ProfileSaver total_number_of_code_cache_queries="
+     << total_number_of_code_cache_queries_ << '\n'
+     << "ProfileSaver total_number_of_skipped_writes=" << total_number_of_skipped_writes_ << '\n'
+     << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
+     << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
+     << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
+     << "ProfileSaver total_number_of_foreign_dex_marks="
+     << total_number_of_foreign_dex_marks_ << '\n'
+     << "ProfileSaver max_number_profile_entries_cached="
+    << max_number_of_profile_entries_cached_ << '\n';
+}
+
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index e7eab95..0a222bf 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -37,7 +37,7 @@
 
   // Stops the profile saver thread.
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Stop()
+  static void Stop(bool dump_info_)
       REQUIRES(!Locks::profiler_lock_, !wait_lock_)
       NO_THREAD_SAFETY_ANALYSIS;
 
@@ -46,6 +46,9 @@
 
   static void NotifyDexUse(const std::string& dex_location);
 
+  // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
+  static void DumpInstanceInfo(std::ostream& os);
+
  private:
   ProfileSaver(const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
@@ -67,14 +70,25 @@
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
   void AddTrackedLocations(const std::string& output_filename,
+                           const std::string& app_data_dir,
                            const std::vector<std::string>& code_paths)
       REQUIRES(Locks::profiler_lock_);
 
-  static void MaybeRecordDexUseInternal(
+  // Retrieves the cached profile compilation info for the given profile file.
+  // If no entry exists, a new empty one will be created, added to the cache and
+  // then returned.
+  ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
+  // Fetches the current resolved classes from the ClassLinker and stores them
+  // in the profile_cache_ for later save.
+  void FetchAndCacheResolvedClasses();
+
+  static bool MaybeRecordDexUseInternal(
       const std::string& dex_location,
       const std::set<std::string>& tracked_locations,
       const std::string& foreign_dex_profile_path,
-      const std::string& app_data_dir);
+      const std::set<std::string>& app_data_dirs);
+
+  void DumpInfo(std::ostream& os);
 
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
@@ -82,18 +96,44 @@
   static pthread_t profiler_pthread_ GUARDED_BY(Locks::profiler_lock_);
 
   jit::JitCodeCache* jit_code_cache_;
+
+  // Collection of code paths that the profiles tracks.
+  // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
+  // The directory were the we should store the code paths.
   std::string foreign_dex_profile_path_;
-  std::string app_data_dir_;
-  uint64_t code_cache_last_update_time_ns_;
+
+  // A list of application directories, used to infer if a loaded dex belongs
+  // to the application or not. Multiple application data directories are possible when
+  // different apps share the same runtime.
+  std::set<std::string> app_data_dirs_ GUARDED_BY(Locks::profiler_lock_);
+
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-  bool first_profile_ = true;
+  uint32_t last_save_number_of_methods_;
+  uint32_t last_save_number_of_classes_;
+
+  // A local cache for the profile information. Maps each tracked file to its
+  // profile information. The size of this cache is usually very small and tops
+  // to just a few hundreds entries in the ProfileCompilationInfo objects.
+  // It helps avoiding unnecessary writes to disk.
+  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
 
+  uint64_t total_bytes_written_;
+  uint64_t total_number_of_writes_;
+  uint64_t total_number_of_code_cache_queries_;
+  uint64_t total_number_of_skipped_writes_;
+  uint64_t total_number_of_failed_writes_;
+  uint64_t total_ms_of_sleep_;
+  uint64_t total_ns_of_work_;
+  uint64_t total_number_of_foreign_dex_marks_;
+  // TODO(calin): replace with an actual size.
+  uint64_t max_number_of_profile_entries_cached_;
+
   DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
 };
 
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index c31b22e..6dd182a 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -62,6 +62,9 @@
 // invoking this method will throw an exception.
 static constexpr uint32_t kAccDefaultConflict =      0x00800000;  // method (runtime)
 
+// Set by the verifier for a method we do not want the compiler to compile.
+static constexpr uint32_t kAccCompileDontBother =    0x01000000;  // method (runtime)
+
 // Special runtime-only flags.
 // Interface and all its super-interfaces with default methods have been recursively initialized.
 static constexpr uint32_t kAccRecursivelyInitialized    = 0x20000000;
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 9894353..9ab0072 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -74,6 +74,20 @@
   compare.release();
 }
 
+const OatFile* OatFileManager::FindOpenedOatFileFromDexLocation(
+    const std::string& dex_base_location) const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
+    for (const OatDexFile* oat_dex_file : oat_dex_files) {
+      if (DexFile::GetBaseLocation(oat_dex_file->GetDexFileLocation()) == dex_base_location) {
+        return oat_file.get();
+      }
+    }
+  }
+  return nullptr;
+}
+
 const OatFile* OatFileManager::FindOpenedOatFileFromOatLocation(const std::string& oat_location)
     const {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index 574d0e2..f98102e 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -60,6 +60,11 @@
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
+  // Find the oat file which contains a dex files with the given dex base location,
+  // returns null if there are none.
+  const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_base_location) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
   // Attempt to reserve a location, returns false if it is already reserved or already in used by
   // an oat file.
   bool RegisterOatFileLocation(const std::string& oat_location)
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 472a85c..6a50b8e 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1459,6 +1459,14 @@
   return stat(filename.c_str(), &buffer) == 0;
 }
 
+bool FileExistsAndNotEmpty(const std::string& filename) {
+  struct stat buffer;
+  if (stat(filename.c_str(), &buffer) != 0) {
+    return false;
+  }
+  return buffer.st_size > 0;
+}
+
 std::string PrettyDescriptor(Primitive::Type type) {
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
diff --git a/runtime/utils.h b/runtime/utils.h
index 83ac0b8..c1e88a4 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -296,6 +296,7 @@
 
 // Returns true if the file exists.
 bool FileExists(const std::string& filename);
+bool FileExistsAndNotEmpty(const std::string& filename);
 
 class VoidFunctor {
  public:
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 647578e..8802e62 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -396,6 +396,13 @@
                                                     << PrettyMethod(method_idx, *dex_file) << "\n");
       }
       result.kind = kSoftFailure;
+      if (method != nullptr &&
+          !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
+        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+      }
+    }
+    if (method != nullptr && verifier.HasInstructionThatWillThrow()) {
+      method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
     }
   } else {
     // Bad method data.
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index ebb0b8c..2592a21 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -142,6 +142,14 @@
     kHardFailure,
   };
 
+  static bool CanCompilerHandleVerificationFailure(uint32_t encountered_failure_types) {
+    constexpr uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
+        | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
+    return (encountered_failure_types & (~unresolved_mask)) == 0;
+  }
+
   // Verify a class. Returns "kNoFailure" on success.
   static FailureKind VerifyClass(Thread* self,
                                  mirror::Class* klass,
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index c96b18c..ddc2f77 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -28,10 +28,6 @@
   }
 
   /// CHECK-START: Main Main.thisTest() builder (after)
-  /// CHECK:         NullCheck
-  /// CHECK:         InvokeStaticOrDirect
-
-  /// CHECK-START: Main Main.thisTest() instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   /// CHECK:         InvokeStaticOrDirect
   public Main thisTest() {
@@ -40,12 +36,10 @@
 
   /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK:         NewInstance
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
-  /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
   public Main newInstanceRemoveTest() {
     Main m = new Main();
@@ -54,13 +48,10 @@
 
   /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK:         NewArray
-  /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (after)
-  /// CHECK:         NewArray
+  /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK-NOT:     NullCheck
-  /// CHECK:         ArrayGet
   public Main newArrayRemoveTest() {
     Main[] ms = new Main[1];
     return ms[0];
@@ -179,9 +170,6 @@
   }
 
   /// CHECK-START: Main Main.scopeRemoveTest(int, Main) builder (after)
-  /// CHECK:         NullCheck
-
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after)
   /// CHECK-NOT:     NullCheck
   public Main scopeRemoveTest(int count, Main a) {
     Main m = null;
diff --git a/test/572-checker-array-get-regression/src/Main.java b/test/572-checker-array-get-regression/src/Main.java
index b55be70..89b97ed 100644
--- a/test/572-checker-array-get-regression/src/Main.java
+++ b/test/572-checker-array-get-regression/src/Main.java
@@ -25,13 +25,11 @@
   /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
   /// CHECK-DAG:     <<ConstM1:i\d+>>      IntConstant -1
   /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
-  /// CHECK-DAG:     <<NullCheck1:l\d+>>   NullCheck [<<Array>>]
-  /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<NullCheck1>>]
+  /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<Array>>]
   /// CHECK-DAG:     <<Index:i\d+>>        Add [<<Length1>>,<<ConstM1>>]
-  /// CHECK-DAG:     <<NullCheck2:l\d+>>   NullCheck [<<Array>>]
-  /// CHECK-DAG:     <<Length2:i\d+>>      ArrayLength [<<NullCheck2>>]
+  /// CHECK-DAG:     <<Length2:i\d+>>      ArrayLength [<<Array>>]
   /// CHECK-DAG:     <<BoundsCheck:i\d+>>  BoundsCheck [<<Index>>,<<Length2>>]
-  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<NullCheck2>>,<<BoundsCheck>>]
+  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<BoundsCheck>>]
   /// CHECK-DAG:                           Return [<<LastElement>>]
 
 
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index d9b26bc..0cd77ab 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -77,7 +77,12 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
- 0.4 Pending
+ 0.6 Pending
+
+ 0.5 Apr 19, 2016
+   Update perflib to perflib-25.0.0 to improve processing performance.
+
+ 0.4 Feb 23, 2016
    Annotate char[] objects with their string values.
    Show registered native allocations for heap dumps that support it.
 
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 2adec6f..d088e8c 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -25,8 +25,8 @@
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
 import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+import gnu.trove.TObjectProcedure;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -85,49 +85,59 @@
 
     ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
-      long total = 0;
-      for (Instance inst : Iterables.concat(heap.getClasses(), heap.getInstances())) {
-        Instance dominator = inst.getImmediateDominator();
-        if (dominator != null) {
-          total += inst.getSize();
+      // Use a single element array for the total to act as a reference to a
+      // long.
+      final long[] total = new long[]{0};
+      TObjectProcedure<Instance> processInstance = new TObjectProcedure<Instance>() {
+        @Override
+        public boolean execute(Instance inst) {
+          Instance dominator = inst.getImmediateDominator();
+          if (dominator != null) {
+            total[0] += inst.getSize();
 
-          if (dominator == Snapshot.SENTINEL_ROOT) {
-            mRooted.add(inst);
-          }
+            if (dominator == Snapshot.SENTINEL_ROOT) {
+              mRooted.add(inst);
+            }
 
-          // Properly label the class of a class object.
-          if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
-              inst.setClassId(javaLangClass.getId());
-          }
+            // Properly label the class of a class object.
+            if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
+                inst.setClassId(javaLangClass.getId());
+            }
 
-          // Update dominated instances.
-          List<Instance> instances = mDominated.get(dominator);
-          if (instances == null) {
-            instances = new ArrayList<Instance>();
-            mDominated.put(dominator, instances);
-          }
-          instances.add(inst);
+            // Update dominated instances.
+            List<Instance> instances = mDominated.get(dominator);
+            if (instances == null) {
+              instances = new ArrayList<Instance>();
+              mDominated.put(dominator, instances);
+            }
+            instances.add(inst);
 
-          // Update sites.
-          List<StackFrame> path = Collections.emptyList();
-          StackTrace stack = getStack(inst);
-          int stackId = getStackTraceSerialNumber(stack);
-          if (stack != null) {
-            StackFrame[] frames = getStackFrames(stack);
-            if (frames != null && frames.length > 0) {
-              path = Lists.reverse(Arrays.asList(frames));
+            // Update sites.
+            List<StackFrame> path = Collections.emptyList();
+            StackTrace stack = getStack(inst);
+            int stackId = getStackTraceSerialNumber(stack);
+            if (stack != null) {
+              StackFrame[] frames = getStackFrames(stack);
+              if (frames != null && frames.length > 0) {
+                path = Lists.reverse(Arrays.asList(frames));
+              }
+            }
+            mRootSite.add(stackId, 0, path.iterator(), inst);
+
+            // Update native allocations.
+            InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
+            if (alloc != null) {
+              mNativeAllocations.add(alloc);
             }
           }
-          mRootSite.add(stackId, 0, path.iterator(), inst);
-
-          // Update native allocations.
-          InstanceUtils.NativeAllocation alloc = InstanceUtils.getNativeAllocation(inst);
-          if (alloc != null) {
-            mNativeAllocations.add(alloc);
-          }
+          return true;
         }
+      };
+      for (Instance instance : heap.getClasses()) {
+        processInstance.execute(instance);
       }
-      mHeapSizes.put(heap, total);
+      heap.forEachInstance(processInstance);
+      mHeapSizes.put(heap, total[0]);
     }
 
     // Record the roots and their types.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index d7b64e2..8defba2 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -244,8 +244,8 @@
 
     if (inst instanceof ArrayInstance) {
       ArrayInstance array = (ArrayInstance)inst;
-      if (array.getArrayType() == Type.BYTE && inst.getHardReferences().size() == 1) {
-        Instance ref = inst.getHardReferences().get(0);
+      if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
+        Instance ref = inst.getHardReverseReferences().get(0);
         ClassObj clsref = ref.getClassObj();
         if (clsref != null && "android.graphics.Bitmap".equals(clsref.getClassName())) {
           return ref;
@@ -344,7 +344,7 @@
     }
 
     Instance referent = null;
-    for (Instance ref : inst.getHardReferences()) {
+    for (Instance ref : inst.getHardReverseReferences()) {
       if (isInstanceOfClass(ref, "sun.misc.Cleaner")) {
         referent = InstanceUtils.getReferent(ref);
         if (referent != null) {
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 06023da..4df1be5 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -160,11 +160,11 @@
   private static void printReferences(
       Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
-    if (inst.getHardReferences().isEmpty()) {
+    if (inst.getHardReverseReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getHardReferences();
+      List<Instance> references = inst.getHardReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));
@@ -173,10 +173,10 @@
       selector.render(doc);
     }
 
-    if (inst.getSoftReferences() != null) {
+    if (inst.getSoftReverseReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      List<Instance> references = inst.getSoftReferences();
+      List<Instance> references = inst.getSoftReverseReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
       for (Instance ref : selector.selected()) {
         doc.row(Value.render(snapshot, ref));