Merge "ART: Add option to ensure deterministic compilation"
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index c5df134..0cd41bb 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -71,6 +71,7 @@
       nullptr,
       false,
       "",
+      false,
       false);
     VerificationResults verification_results(&compiler_options);
     DexFileToMethodInlinerMap method_inliner_map;
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index d63878d..efdc333 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -54,6 +54,7 @@
         nullptr,
         false,
         "",
+        false,
         false));
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
     method_inliner_map_.reset(new DexFileToMethodInlinerMap());
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c483f33..f1b7458 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -39,6 +39,7 @@
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/dex_to_dex_compiler.h"
 #include "dex/verification_results.h"
 #include "dex/verified_method.h"
@@ -365,7 +366,7 @@
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
       had_hard_verifier_failure_(false),
-      thread_count_(thread_count),
+      parallel_thread_count_(thread_count),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
@@ -435,24 +436,27 @@
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  std::unique_ptr<ThreadPool> thread_pool(
-      new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+
+  InitializeThreadPools();
+
   VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false);
   // Precompile:
   // 1) Load image classes
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, thread_pool.get(), timings);
+  PreCompile(class_loader, dex_files, timings);
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
   //    compilation.
   if (!GetCompilerOptions().VerifyAtRuntime()) {
-    Compile(class_loader, dex_files, thread_pool.get(), timings);
+    Compile(class_loader, dex_files, timings);
   }
   if (dump_stats_) {
     stats_->Dump();
   }
+
+  FreeThreadPools();
 }
 
 static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
@@ -653,8 +657,9 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
-  PreCompile(jclass_loader, dex_files, thread_pool.get(), timings);
+  InitializeThreadPools();
+
+  PreCompile(jclass_loader, dex_files, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -677,20 +682,147 @@
                 true,
                 dex_cache);
 
+  FreeThreadPools();
+
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Resolve(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
+  // Resolution allocates classes and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* resolve_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    ResolveDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   resolve_thread_pool,
+                   resolve_thread_count,
+                   timings);
   }
 }
 
-void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool* thread_pool, TimingLogger* timings) {
+// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now
+// this is single-threaded for simplicity.
+// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
+//       stable order.
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const DexFile& dex_file,
+                                const DexFile::CodeItem* code_item) {
+  if (code_item == nullptr) {
+    // Abstract or native method.
+    return;
+  }
+
+  const uint16_t* code_ptr = code_item->insns_;
+  const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+
+  while (code_ptr < code_end) {
+    const Instruction* inst = Instruction::At(code_ptr);
+    switch (inst->Opcode()) {
+      case Instruction::CONST_STRING: {
+        uint32_t string_index = inst->VRegB_21c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+      case Instruction::CONST_STRING_JUMBO: {
+        uint32_t string_index = inst->VRegB_31c();
+        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        break;
+      }
+
+      default:
+        break;
+    }
+
+    code_ptr += inst->SizeInCodeUnits();
+  }
+}
+
+static void ResolveConstStrings(CompilerDriver* driver,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  for (const DexFile* dex_file : dex_files) {
+    TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
+
+    size_t class_def_count = dex_file->NumClassDefs();
+    for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+
+      const uint8_t* class_data = dex_file->GetClassData(class_def);
+      if (class_data == nullptr) {
+        // empty class, probably a marker interface
+        continue;
+      }
+
+      ClassDataItemIterator it(*dex_file, class_data);
+      // Skip fields
+      while (it.HasNextStaticField()) {
+        it.Next();
+      }
+      while (it.HasNextInstanceField()) {
+        it.Next();
+      }
+
+      bool compilation_enabled = driver->IsClassToCompile(
+          dex_file->StringByTypeIdx(class_def.class_idx_));
+      if (!compilation_enabled) {
+        // Compilation is skipped, do not resolve const-string in code of this class.
+        // TODO: Make sure that inlining honors this.
+        continue;
+      }
+
+      // Direct methods.
+      int64_t previous_direct_method_idx = -1;
+      while (it.HasNextDirectMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_direct_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_direct_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      // Virtual methods.
+      int64_t previous_virtual_method_idx = -1;
+      while (it.HasNextVirtualMethod()) {
+        uint32_t method_idx = it.GetMemberIndex();
+        if (method_idx == previous_virtual_method_idx) {
+          // smali can create dex files with two encoded_methods sharing the same method_idx
+          // http://code.google.com/p/smali/issues/detail?id=119
+          it.Next();
+          continue;
+        }
+        previous_virtual_method_idx = method_idx;
+        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        it.Next();
+      }
+      DCHECK(!it.HasNext());
+    }
+  }
+}
+
+inline void CompilerDriver::CheckThreadPools() {
+  DCHECK(parallel_thread_pool_ != nullptr);
+  DCHECK(single_thread_pool_ != nullptr);
+}
+
+void CompilerDriver::PreCompile(jobject class_loader,
+                                const std::vector<const DexFile*>& dex_files,
+                                TimingLogger* timings) {
+  CheckThreadPools();
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -700,20 +832,26 @@
   // We need to resolve for never_verify since it needs to run dex to dex to add the
   // RETURN_VOID_NO_BARRIER.
   if (never_verify || verification_enabled) {
-    Resolve(class_loader, dex_files, thread_pool, timings);
+    Resolve(class_loader, dex_files, timings);
     VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
   }
 
   if (never_verify) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
-    SetVerified(class_loader, dex_files, thread_pool, timings);
+    SetVerified(class_loader, dex_files, timings);
   }
 
   if (!verification_enabled) {
     return;
   }
 
-  Verify(class_loader, dex_files, thread_pool, timings);
+  if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) {
+    // Resolve strings from const-string. Do this now to have a deterministic image.
+    ResolveConstStrings(this, dex_files, timings);
+    VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
+  }
+
+  Verify(class_loader, dex_files, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -721,7 +859,7 @@
                << "situations. Please check the log.";
   }
 
-  InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  InitializeClasses(class_loader, dex_files, timings);
   VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
 
   UpdateImageClasses(timings);
@@ -1759,6 +1897,9 @@
 
     // Wait for all the worker threads to finish.
     thread_pool_->Wait(self, true, false);
+
+    // And stop the workers accepting jobs.
+    thread_pool_->StopWorkers(self);
   }
 
   size_t NextIndex() {
@@ -1995,9 +2136,12 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::ResolveDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -2010,27 +2154,43 @@
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
     ResolveTypeVisitor visitor(&context);
-    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_);
+    context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count);
   }
 
   TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings);
   ResolveClassFieldsAndMethodsVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
-void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                 ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::SetVerified(jobject class_loader,
+                                 const std::vector<const DexFile*>& dex_files,
+                                 TimingLogger* timings) {
+  // This can be run in parallel.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    SetVerifiedDexFile(class_loader,
+                       *dex_file,
+                       dex_files,
+                       parallel_thread_pool_.get(),
+                       parallel_thread_count_,
+                       timings);
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Verify(jobject class_loader,
+                            const std::vector<const DexFile*>& dex_files,
+                            TimingLogger* timings) {
+  // Note: verification should not be pulling in classes anymore when compiling the boot image,
+  //       as all should have been resolved before. As such, doing this in parallel should still
+  //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    VerifyDexFile(class_loader,
+                  *dex_file,
+                  dex_files,
+                  parallel_thread_pool_.get(),
+                  parallel_thread_count_,
+                  timings);
   }
 }
 
@@ -2104,15 +2264,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::VerifyDexFile(jobject class_loader,
+                                   const DexFile& dex_file,
                                    const std::vector<const DexFile*>& dex_files,
-                                   ThreadPool* thread_pool, TimingLogger* timings) {
+                                   ThreadPool* thread_pool,
+                                   size_t thread_count,
+                                   TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   VerifyClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class SetVerifiedClassVisitor : public CompilationVisitor {
@@ -2162,15 +2325,18 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::SetVerifiedDexFile(jobject class_loader,
+                                        const DexFile& dex_file,
                                         const std::vector<const DexFile*>& dex_files,
-                                        ThreadPool* thread_pool, TimingLogger* timings) {
+                                        ThreadPool* thread_pool,
+                                        size_t thread_count,
+                                        TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   SetVerifiedClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 class InitializeClassVisitor : public CompilationVisitor {
@@ -2271,31 +2437,37 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+void CompilerDriver::InitializeClasses(jobject jni_class_loader,
+                                       const DexFile& dex_file,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
+
+  // Initialization allocates objects and needs to run single-threaded to be deterministic.
+  bool force_determinism = GetCompilerOptions().IsForceDeterminism();
+  ThreadPool* init_thread_pool = force_determinism
+                                     ? single_thread_pool_.get()
+                                     : parallel_thread_pool_.get();
+  size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
-                                     thread_pool);
-  size_t thread_count;
+                                     init_thread_pool);
   if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
-    thread_count = 1U;
-  } else {
-    thread_count = thread_count_;
+    init_thread_count = 1U;
   }
   InitializeClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count);
 }
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool* thread_pool, TimingLogger* timings) {
+                                       TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, timings);
   }
   if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
@@ -2303,8 +2475,9 @@
   }
 }
 
-void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool* thread_pool, TimingLogger* timings) {
+void CompilerDriver::Compile(jobject class_loader,
+                             const std::vector<const DexFile*>& dex_files,
+                             TimingLogger* timings) {
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] " <<
         ((profile_compilation_info_ == nullptr)
@@ -2314,7 +2487,12 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
+    CompileDexFile(class_loader,
+                   *dex_file,
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
   }
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
@@ -2421,14 +2599,17 @@
   const ParallelCompilationManager* const manager_;
 };
 
-void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+void CompilerDriver::CompileDexFile(jobject class_loader,
+                                    const DexFile& dex_file,
                                     const std::vector<const DexFile*>& dex_files,
-                                    ThreadPool* thread_pool, TimingLogger* timings) {
+                                    ThreadPool* thread_pool,
+                                    size_t thread_count,
+                                    TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, dex_files, thread_pool);
   CompileClassVisitor visitor(&context);
-  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_);
+  context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count);
 }
 
 void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
@@ -2590,4 +2771,16 @@
   return true;
 }
 
+void CompilerDriver::InitializeThreadPools() {
+  size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0;
+  parallel_thread_pool_.reset(
+      new ThreadPool("Compiler driver thread pool", parallel_count));
+  single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0));
+}
+
+void CompilerDriver::FreeThreadPools() {
+  parallel_thread_pool_.reset();
+  single_thread_pool_.reset();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 6a2f7bf..5e35cbb 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -411,7 +411,7 @@
   }
 
   size_t GetThreadCount() const {
-    return thread_count_;
+    return parallel_thread_count_;
   }
 
   bool GetDumpStats() const {
@@ -550,8 +550,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool* thread_pool, TimingLogger* timings)
+  void PreCompile(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
+                  TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
@@ -559,49 +560,71 @@
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
-  void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings)
+  void Resolve(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
-  void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+  void ResolveDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool* thread_pool, TimingLogger* timings);
-  void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+  void Verify(jobject class_loader,
+              const std::vector<const DexFile*>& dex_files,
+              TimingLogger* timings);
+  void VerifyDexFile(jobject class_loader,
+                     const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
-                     ThreadPool* thread_pool, TimingLogger* timings)
+                     ThreadPool* thread_pool,
+                     size_t thread_count,
+                     TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                   ThreadPool* thread_pool, TimingLogger* timings);
-  void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file,
+  void SetVerified(jobject class_loader,
+                   const std::vector<const DexFile*>& dex_files,
+                   TimingLogger* timings);
+  void SetVerifiedDexFile(jobject class_loader,
+                          const DexFile& dex_file,
                           const std::vector<const DexFile*>& dex_files,
-                          ThreadPool* thread_pool, TimingLogger* timings)
+                          ThreadPool* thread_pool,
+                          size_t thread_count,
+                          TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
-  void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+  void InitializeClasses(jobject class_loader,
                          const std::vector<const DexFile*>& dex_files,
-                         ThreadPool* thread_pool, TimingLogger* timings)
+                         TimingLogger* timings)
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+  void InitializeClasses(jobject class_loader,
+                         const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
+                         TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool* thread_pool, TimingLogger* timings);
-  void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+  void Compile(jobject class_loader,
+               const std::vector<const DexFile*>& dex_files,
+               TimingLogger* timings);
+  void CompileDexFile(jobject class_loader,
+                      const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
-                      ThreadPool* thread_pool, TimingLogger* timings)
+                      ThreadPool* thread_pool,
+                      size_t thread_count,
+                      TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
   bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const;
 
+  void InitializeThreadPools();
+  void FreeThreadPools();
+  void CheckThreadPools();
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -652,7 +675,12 @@
 
   bool had_hard_verifier_failure_;
 
-  size_t thread_count_;
+  // A thread pool that can (potentially) run tasks in parallel.
+  std::unique_ptr<ThreadPool> parallel_thread_pool_;
+  size_t parallel_thread_count_;
+
+  // A thread pool that guarantees running single-threaded on the main thread.
+  std::unique_ptr<ThreadPool> single_thread_pool_;
 
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 9285b8c..3bf8921 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -47,7 +47,8 @@
       abort_on_hard_verifier_failure_(false),
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
-      dump_cfg_append_(false) {
+      dump_cfg_append_(false),
+      force_determinism_(false) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -76,7 +77,8 @@
                                  std::ostream* init_failure_output,
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
-                                 bool dump_cfg_append
+                                 bool dump_cfg_append,
+                                 bool force_determinism
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -102,7 +104,8 @@
     abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
-    dump_cfg_append_(dump_cfg_append) {
+    dump_cfg_append_(dump_cfg_append),
+    force_determinism_(force_determinism) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6989bd5..39372b3 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -86,7 +86,8 @@
                   std::ostream* init_failure_output,
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
-                  bool dump_cfg_append);
+                  bool dump_cfg_append,
+                  bool force_determinism);
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -245,6 +246,10 @@
     return dump_cfg_append_;
   }
 
+  bool IsForceDeterminism() const {
+    return force_determinism_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -300,6 +305,10 @@
   std::string dump_cfg_file_name_;
   bool dump_cfg_append_;
 
+  // Whether the compiler should trade performance for determinism to guarantee exactly reproducable
+  // outcomes.
+  bool force_determinism_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 72c615e..c8720ea 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1866,6 +1866,9 @@
   orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
+
+  // Remove the clinitThreadId. This is required for image determinism.
+  copy->SetClinitThreadId(static_cast<pid_t>(0));
 }
 
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
@@ -1993,6 +1996,10 @@
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
+
+  // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
+  // compiler pointers in here will make the output non-deterministic.
+  copy_dex_cache->SetDexFile(nullptr);
 }
 
 const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3a3275a..478d169 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -112,7 +112,8 @@
       /* init_failure_output */ nullptr,
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
-      /* dump_cfg_append */ false));
+      /* dump_cfg_append */ false,
+      /* force_determinism */ false));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c551bad..bbed09c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -560,7 +560,8 @@
       dump_slow_timing_(kIsDebugBuild),
       swap_fd_(-1),
       app_image_fd_(kInvalidFd),
-      timings_(timings) {}
+      timings_(timings),
+      force_determinism_(false) {}
 
   ~Dex2Oat() {
     // Log completion time before deleting the runtime_, because this accesses
@@ -922,6 +923,12 @@
 
     // Fill some values into the key-value store for the oat header.
     key_value_store_.reset(new SafeMap<std::string, std::string>());
+
+    // Automatically force determinism for the boot image in a host build.
+    if (!kIsTargetBuild && IsBootImage()) {
+      force_determinism_ = true;
+    }
+    compiler_options_->force_determinism_ = force_determinism_;
   }
 
   void ExpandOatAndImageFilenames() {
@@ -1165,6 +1172,8 @@
         multi_image_ = true;
       } else if (option.starts_with("--no-inline-from=")) {
         no_inline_from_string_ = option.substr(strlen("--no-inline-from=")).data();
+      } else if (option == "--force-determinism") {
+        force_determinism_ = true;
       } else if (!compiler_options_->ParseCompilerOption(option, Usage)) {
         Usage("Unknown argument %s", option.data());
       }
@@ -2112,6 +2121,21 @@
     // foreground collector by default for dex2oat.
     raw_options.push_back(std::make_pair("-XX:DisableHSpaceCompactForOOM", nullptr));
 
+    // If we're asked to be deterministic, ensure non-concurrent GC for determinism. Also
+    // force the free-list implementation for large objects.
+    if (compiler_options_->IsForceDeterminism()) {
+      raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr));
+      raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr));
+
+      // We also need to turn off the nonmoving space. For that, we need to disable HSpace
+      // compaction (done above) and ensure that neither foreground nor background collectors
+      // are concurrent.
+      raw_options.push_back(std::make_pair("-XX:BackgroundGC=nonconcurrent", nullptr));
+
+      // To make identity hashcode deterministic, set a known seed.
+      mirror::Object::SetHashCodeSeed(987654321U);
+    }
+
     if (!Runtime::ParseOptions(raw_options, false, runtime_options)) {
       LOG(ERROR) << "Failed to parse runtime options";
       return false;
@@ -2415,6 +2439,9 @@
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
+  // See CompilerOptions.force_determinism_.
+  bool force_determinism_;
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 7a0decb..a1f6eee 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1654,7 +1654,7 @@
       stats_.file_bytes = file->GetLength();
     }
     size_t header_bytes = sizeof(ImageHeader);
-    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+    const auto& object_section = image_header_.GetImageSection(ImageHeader::kSectionObjects);
     const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields);
     const auto& method_section = image_header_.GetMethodsSection();
     const auto& dex_cache_arrays_section = image_header_.GetImageSection(
@@ -1663,17 +1663,46 @@
         ImageHeader::kSectionInternedStrings);
     const auto& class_table_section = image_header_.GetImageSection(
         ImageHeader::kSectionClassTable);
+    const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
+
     stats_.header_bytes = header_bytes;
-    stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
-    // Add padding between the field and method section.
-    // (Field section is 4-byte aligned, method section is 8-byte aligned on 64-bit targets.)
-    stats_.alignment_bytes += method_section.Offset() -
-        (field_section.Offset() + field_section.Size());
-    // Add padding between the dex cache arrays section and the intern table. (Dex cache
-    // arrays section is 4-byte aligned on 32-bit targets, intern table is 8-byte aligned.)
-    stats_.alignment_bytes += intern_section.Offset() -
-        (dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size());
-    stats_.alignment_bytes += bitmap_section.Offset() - image_header_.GetImageSize();
+
+    // Objects are kObjectAlignment-aligned.
+    // CHECK_EQ(RoundUp(header_bytes, kObjectAlignment), object_section.Offset());
+    if (object_section.Offset() > header_bytes) {
+      stats_.alignment_bytes += object_section.Offset() - header_bytes;
+    }
+
+    // Field section is 4-byte aligned.
+    constexpr size_t kFieldSectionAlignment = 4U;
+    uint32_t end_objects = object_section.Offset() + object_section.Size();
+    CHECK_EQ(RoundUp(end_objects, kFieldSectionAlignment), field_section.Offset());
+    stats_.alignment_bytes += field_section.Offset() - end_objects;
+
+    // Method section is 4/8 byte aligned depending on target. Just check for 4-byte alignment.
+    uint32_t end_fields = field_section.Offset() + field_section.Size();
+    CHECK_ALIGNED(method_section.Offset(), 4);
+    stats_.alignment_bytes += method_section.Offset() - end_fields;
+
+    // Dex cache arrays section is aligned depending on the target. Just check for 4-byte alignment.
+    uint32_t end_methods = method_section.Offset() + method_section.Size();
+    CHECK_ALIGNED(dex_cache_arrays_section.Offset(), 4);
+    stats_.alignment_bytes += dex_cache_arrays_section.Offset() - end_methods;
+
+    // Intern table is 8-byte aligned.
+    uint32_t end_caches = dex_cache_arrays_section.Offset() + dex_cache_arrays_section.Size();
+    CHECK_EQ(RoundUp(end_caches, 8U), intern_section.Offset());
+    stats_.alignment_bytes += intern_section.Offset() - end_caches;
+
+    // Add space between intern table and class table.
+    uint32_t end_intern = intern_section.Offset() + intern_section.Size();
+    stats_.alignment_bytes += class_table_section.Offset() - end_intern;
+
+    // Add space between class table and bitmap. Expect the bitmap to be page-aligned.
+    uint32_t end_ctable = class_table_section.Offset() + class_table_section.Size();
+    CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
+    stats_.alignment_bytes += bitmap_section.Offset() - end_ctable;
+
     stats_.bitmap_bytes += bitmap_section.Size();
     stats_.art_field_bytes += field_section.Size();
     stats_.art_method_bytes += method_section.Size();
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3964539..84483b4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,6 +114,9 @@
 // timeout on how long we wait for finalizers to run. b/21544853
 static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u);
 
+// For deterministic compilation, we need the heap to be at a well-known address.
+static constexpr uint32_t kAllocSpaceBeginForDeterministicAoT = 0x40000000;
+
 Heap::Heap(size_t initial_size,
            size_t growth_limit,
            size_t min_free,
@@ -352,6 +355,11 @@
   }
   std::unique_ptr<MemMap> main_mem_map_1;
   std::unique_ptr<MemMap> main_mem_map_2;
+
+  // Gross hack to make dex2oat deterministic.
+  if (requested_alloc_space_begin == nullptr && Runtime::Current()->IsAotCompiler()) {
+    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(kAllocSpaceBeginForDeterministicAoT);
+  }
   uint8_t* request_begin = requested_alloc_space_begin;
   if (request_begin != nullptr && separate_non_moving_space) {
     request_begin += non_moving_space_capacity;