Merge "Disable the ahead-of-time compilation for MIPS."
diff --git a/Android.mk b/Android.mk
index 4351be9..92339af 100644
--- a/Android.mk
+++ b/Android.mk
@@ -46,9 +46,6 @@
 	rm -f $(ART_TEST_OUT)/*.odex
 	rm -f $(ART_TEST_OUT)/*.oat
 	rm -f $(ART_TEST_OUT)/*.art
-	rm -f $(DALVIK_CACHE_OUT)/*@classes.dex
-	rm -f $(DALVIK_CACHE_OUT)/*.oat
-	rm -f $(DALVIK_CACHE_OUT)/*.art
 	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.odex
 	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.oat
 	rm -f $(HOST_OUT_JAVA_LIBRARIES)/*.art
@@ -66,25 +63,31 @@
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 endif
-	rm -rf /tmp/test-*/dalvik-cache/*@classes.dex
-	rm -rf /tmp/android-data/dalvik-cache/*@classes.dex
+	rm -rf /tmp/test-*/dalvik-cache/*
+	rm -rf /tmp/android-data/dalvik-cache/*
 
 .PHONY: clean-oat-target
 clean-oat-target:
 	adb remount
-	adb shell rm $(ART_NATIVETEST_DIR)/*.odex
-	adb shell rm $(ART_NATIVETEST_DIR)/*.oat
-	adb shell rm $(ART_NATIVETEST_DIR)/*.art
-	adb shell rm $(ART_TEST_DIR)/*.odex
-	adb shell rm $(ART_TEST_DIR)/*.oat
-	adb shell rm $(ART_TEST_DIR)/*.art
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
-	adb shell rm $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
-	adb shell rm $(DEXPREOPT_BOOT_JAR_DIR)/*.art
-	adb shell rm system/app/*.odex
-	adb shell rm data/run-test/test-*/dalvik-cache/*@classes.dex
+	adb shell rm -f $(ART_NATIVETEST_DIR)/*.odex
+	adb shell rm -f $(ART_NATIVETEST_DIR)/*.oat
+	adb shell rm -f $(ART_NATIVETEST_DIR)/*.art
+	adb shell rm -f $(ART_TEST_DIR)/*.odex
+	adb shell rm -f $(ART_TEST_DIR)/*.oat
+	adb shell rm -f $(ART_TEST_DIR)/*.art
+ifdef TARGET_2ND_ARCH
+	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.odex
+	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.oat
+	adb shell rm -f $(2ND_ART_NATIVETEST_DIR)/*.art
+	adb shell rm -f $(2ND_ART_TEST_DIR)/*.odex
+	adb shell rm -f $(2ND_ART_TEST_DIR)/*.oat
+	adb shell rm -f $(2ND_ART_TEST_DIR)/*.art
+endif
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
+	adb shell rm -f $(DEXPREOPT_BOOT_JAR_DIR)/*.art
+	adb shell rm -f system/app/*.odex
+	adb shell rm -rf data/run-test/test-*/dalvik-cache/*
 
 ifneq ($(art_dont_bother),true)
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index f25217f..429c523 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -49,6 +49,7 @@
 	runtime/gtest_test.cc \
 	runtime/indenter_test.cc \
 	runtime/indirect_reference_table_test.cc \
+	runtime/instruction_set_test.cc \
 	runtime/intern_table_test.cc \
 	runtime/leb128_test.cc \
 	runtime/mem_map_test.cc \
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index c3f9b67..b030bb4 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -555,8 +555,8 @@
   return offset;
 }
 
-static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset) {
-  unsigned int element_size = sizeof(void*);
+static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
+                                            unsigned int element_size) {
   // Align to natural pointer size.
   offset = (offset + (element_size - 1)) & ~(element_size - 1);
   for (; lir != NULL; lir = lir->next) {
@@ -726,9 +726,10 @@
 /* Determine the offset of each literal field */
 int Mir2Lir::AssignLiteralOffset(CodeOffset offset) {
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset);
+  unsigned int ptr_size = GetInstructionSetPointerSize(cu_->instruction_set);
+  offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset, ptr_size);
+  offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset, ptr_size);
+  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset, ptr_size);
   return offset;
 }
 
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 05313a9..93a23a6 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -455,14 +455,14 @@
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<unsigned int>(-1)) {
+      if (direct_code != static_cast<uintptr_t>(-1)) {
         if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         }
       } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
-      if (direct_method != static_cast<unsigned int>(-1)) {
+      if (direct_method != static_cast<uintptr_t>(-1)) {
         cg->LoadConstant(cg->TargetReg(kArg0), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
@@ -483,7 +483,7 @@
                       cg->TargetReg(kArg0));
       // Set up direct code if known.
       if (direct_code != 0) {
-        if (direct_code != static_cast<unsigned int>(-1)) {
+        if (direct_code != static_cast<uintptr_t>(-1)) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 00a239b..604ce1c 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -71,6 +71,11 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
 
+  // AAPCS mandates return values are extended.
+  bool RequiresSmallResultTypeExtension() const OVERRIDE {
+    return false;
+  }
+
  protected:
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 92f547c..9fd3265 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -68,6 +68,11 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
 
+  // aarch64 calling convention leaves upper bits undefined.
+  bool RequiresSmallResultTypeExtension() const OVERRIDE {
+    return true;
+  }
+
  protected:
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 4d25d1c..18afd58 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -287,6 +287,8 @@
   FrameOffset ReturnValueSaveLocation() const;
   // Register that holds result if it is integer.
   virtual ManagedRegister IntReturnRegister() = 0;
+  // Whether the compiler needs to ensure zero-/sign-extension of a small result type
+  virtual bool RequiresSmallResultTypeExtension() const = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 93b1b5a..9f439eb 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -314,7 +314,7 @@
           mr_conv->InterproceduralScratchRegister());
 
   // 10. Fix differences in result widths.
-  if (instruction_set == kX86 || instruction_set == kX86_64) {
+  if (main_jni_conv->RequiresSmallResultTypeExtension()) {
     if (main_jni_conv->GetReturnType() == Primitive::kPrimByte ||
         main_jni_conv->GetReturnType() == Primitive::kPrimShort) {
       __ SignExtend(main_jni_conv->ReturnRegister(),
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index e33fbad..8d82dce 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -71,6 +71,11 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
 
+  // Mips does not need to extend small return types.
+  bool RequiresSmallResultTypeExtension() const OVERRIDE {
+    return false;
+  }
+
  protected:
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 5b9069c..025eb6d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -69,6 +69,11 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
 
+  // x86 needs to extend small return types.
+  bool RequiresSmallResultTypeExtension() const OVERRIDE {
+    return true;
+  }
+
  protected:
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index d545774..1ba5353 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -69,6 +69,11 @@
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
 
+  // x86-64 needs to extend small return types.
+  bool RequiresSmallResultTypeExtension() const OVERRIDE {
+    return true;
+  }
+
  protected:
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index fe60959..78bdb4d 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -143,7 +143,7 @@
 
 static std::string DumpDirectory() {
   if (kIsTargetBuild) {
-    return GetDalvikCacheOrDie(GetAndroidData());
+    return GetDalvikCacheOrDie("llvm-dump");
   }
   return "/tmp";
 }
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 1d87eaa..b4bb979 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -467,12 +467,26 @@
 #endif
 }
 
-void Arm64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for Arm64";
+void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
 }
 
-void Arm64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for Arm64";
+void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  Arm64ManagedRegister reg = mreg.AsArm64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsWRegister()) << reg;
+  if (size == 1) {
+    ___ uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  } else {
+    ___ uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+  }
 }
 
 void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 340e623..cdf26f1 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1101,6 +1101,9 @@
   DexFileToMethodInlinerMap method_inliner_map;
   CompilerCallbacksImpl callbacks(&verification_results, &method_inliner_map);
   runtime_options.push_back(std::make_pair("compilercallbacks", &callbacks));
+  runtime_options.push_back(
+      std::make_pair("imageinstructionset",
+                     reinterpret_cast<const void*>(GetInstructionSetString(instruction_set))));
 
   Dex2Oat* p_dex2oat;
   if (!Dex2Oat::Create(&p_dex2oat,
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index d6d2058..4e4a512 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -1360,7 +1360,7 @@
               } else if (Rn.r == 15) {
                 intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                 lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-                args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+                args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
               }
             } else if (op3 == 3) {
               // LDRSH.W Rt, [Rn, #imm12]      - 111 11 00 11 011 nnnn tttt iiiiiiiiiiii
@@ -1373,7 +1373,7 @@
               } else if (Rn.r == 15) {
                 intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                 lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-                args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+                args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
               }
             }
           }
@@ -1430,7 +1430,7 @@
             } else if (Rn.r == 15) {
               intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
               lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-              args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+              args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
             }
           } else if (op4 == 0) {
             // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 583e5e5..c9e3c11 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -919,7 +919,9 @@
   }
 
   std::string cache_error_msg;
-  std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location));
+  const std::string dalvik_cache(GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA)));
+  std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location,
+                                                         dalvik_cache.c_str()));
   dex_file = VerifyAndOpenDexFileFromOatFile(cache_location, dex_location, &cache_error_msg,
                                              &open_failed);
   if (dex_file != nullptr) {
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index ac0f67b..c69ca48 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -23,11 +23,17 @@
 namespace gc {
 namespace allocator {
 
+template<bool kThreadSafe>
 inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
   if (UNLIKELY(size > kLargeSizeThreshold)) {
     return AllocLargeObject(self, size, bytes_allocated);
   }
-  void* m = AllocFromRun(self, size, bytes_allocated);
+  void* m;
+  if (kThreadSafe) {
+    m = AllocFromRun(self, size, bytes_allocated);
+  } else {
+    m = AllocFromRunThreadUnsafe(self, size, bytes_allocated);
+  }
   // Check if the returned memory is really all zero.
   if (kCheckZeroMemory && m != nullptr) {
     byte* bytes = reinterpret_cast<byte*>(m);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index ff59016..f113030 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -67,11 +67,11 @@
              << std::hex << (intptr_t)(base_ + capacity_)
              << ", capacity=" << std::dec << capacity_
              << ", max_capacity=" << std::dec << max_capacity_;
-  memset(current_runs_, 0, sizeof(current_runs_));
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
     size_bracket_lock_names[i] =
         StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
     size_bracket_locks_[i] = new Mutex(size_bracket_lock_names[i].c_str(), kRosAllocBracketLock);
+    current_runs_[i] = dedicated_full_run_;
   }
   DCHECK_EQ(footprint_, capacity_);
   size_t num_of_pages = footprint_ / kPageSize;
@@ -548,7 +548,7 @@
     DCHECK(!new_run->IsThreadLocal());
     DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
     DCHECK(!new_run->to_be_bulk_freed_);
-    if (kUsePrefetchDuringAllocRun && idx <= kMaxThreadLocalSizeBracketIdx) {
+    if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
       // Take ownership of the cache lines if we are likely to be thread local run.
       if (kPrefetchNewRunDataByZeroing) {
         // Zeroing the data is sometimes faster than prefetching but it increases memory usage
@@ -584,6 +584,60 @@
   return AllocRun(self, idx);
 }
 
+void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
+  Run* current_run = current_runs_[idx];
+  DCHECK(current_run != nullptr);
+  void* slot_addr = current_run->AllocSlot();
+  if (UNLIKELY(slot_addr == nullptr)) {
+    // The current run got full. Try to refill it.
+    DCHECK(current_run->IsFull());
+    if (kIsDebugBuild && current_run != dedicated_full_run_) {
+      full_runs_[idx].insert(current_run);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+                  << " into full_runs_[" << std::dec << idx << "]";
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
+    }
+    current_run = RefillRun(self, idx);
+    if (UNLIKELY(current_run == nullptr)) {
+      // Failed to allocate a new run, make sure that it is the dedicated full run.
+      current_runs_[idx] = dedicated_full_run_;
+      return nullptr;
+    }
+    DCHECK(current_run != nullptr);
+    DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+    current_run->SetIsThreadLocal(false);
+    current_runs_[idx] = current_run;
+    DCHECK(!current_run->IsFull());
+    slot_addr = current_run->AllocSlot();
+    // Must succeed now with a new run.
+    DCHECK(slot_addr != nullptr);
+  }
+  return slot_addr;
+}
+
+void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK_LE(size, kLargeSizeThreshold);
+  size_t bracket_size;
+  size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
+  DCHECK_EQ(idx, SizeToIndex(size));
+  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+  DCHECK_EQ(bracket_size, bracketSizes[idx]);
+  DCHECK_LE(size, bracket_size);
+  DCHECK(size > 512 || bracket_size - size < 16);
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
+  if (LIKELY(slot_addr != nullptr)) {
+    DCHECK(bytes_allocated != nullptr);
+    *bytes_allocated = bracket_size;
+    // Caller verifies that it is all 0.
+  }
+  return slot_addr;
+}
+
 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
@@ -596,7 +650,7 @@
 
   void* slot_addr;
 
-  if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
+  if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
     // Use a thread-local run.
     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
     // Allow invalid since this will always fail the allocation.
@@ -631,7 +685,6 @@
         // No slots got freed. Try to refill the thread-local run.
         DCHECK(thread_local_run->IsFull());
         if (thread_local_run != dedicated_full_run_) {
-          self->SetRosAllocRun(idx, dedicated_full_run_);
           thread_local_run->SetIsThreadLocal(false);
           if (kIsDebugBuild) {
             full_runs_[idx].insert(thread_local_run);
@@ -646,8 +699,9 @@
         }
 
         thread_local_run = RefillRun(self, idx);
-        if (UNLIKELY(thread_local_run == NULL)) {
-          return NULL;
+        if (UNLIKELY(thread_local_run == nullptr)) {
+          self->SetRosAllocRun(idx, dedicated_full_run_);
+          return nullptr;
         }
         DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
         DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
@@ -656,12 +710,12 @@
         DCHECK(!thread_local_run->IsFull());
       }
 
-      DCHECK(thread_local_run != NULL);
+      DCHECK(thread_local_run != nullptr);
       DCHECK(!thread_local_run->IsFull());
       DCHECK(thread_local_run->IsThreadLocal());
       slot_addr = thread_local_run->AllocSlot();
       // Must succeed now with a new run.
-      DCHECK(slot_addr != NULL);
+      DCHECK(slot_addr != nullptr);
     }
     if (kTraceRosAlloc) {
       LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
@@ -671,48 +725,7 @@
   } else {
     // Use the (shared) current run.
     MutexLock mu(self, *size_bracket_locks_[idx]);
-    Run* current_run = current_runs_[idx];
-    if (UNLIKELY(current_run == NULL)) {
-      current_run = RefillRun(self, idx);
-      if (UNLIKELY(current_run == NULL)) {
-        return NULL;
-      }
-      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
-      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
-      current_run->SetIsThreadLocal(false);
-      current_runs_[idx] = current_run;
-      DCHECK(!current_run->IsFull());
-    }
-    DCHECK(current_run != NULL);
-    slot_addr = current_run->AllocSlot();
-    if (UNLIKELY(slot_addr == NULL)) {
-      // The current run got full. Try to refill it.
-      DCHECK(current_run->IsFull());
-      current_runs_[idx] = NULL;
-      if (kIsDebugBuild) {
-        // Insert it into full_runs and set the current run to NULL.
-        full_runs_[idx].insert(current_run);
-        if (kTraceRosAlloc) {
-          LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
-                    << " into full_runs_[" << std::dec << idx << "]";
-        }
-      }
-      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
-      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
-      current_run = RefillRun(self, idx);
-      if (UNLIKELY(current_run == NULL)) {
-        return NULL;
-      }
-      DCHECK(current_run != NULL);
-      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
-      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
-      current_run->SetIsThreadLocal(false);
-      current_runs_[idx] = current_run;
-      DCHECK(!current_run->IsFull());
-      slot_addr = current_run->AllocSlot();
-      // Must succeed now with a new run.
-      DCHECK(slot_addr != NULL);
-    }
+    slot_addr = AllocFromCurrentRunUnlocked(self, idx);
     if (kTraceRosAlloc) {
       LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
                 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
@@ -741,7 +754,7 @@
   }
   if (LIKELY(run->IsThreadLocal())) {
     // It's a thread-local run. Just mark the thread-local free bit map and return.
-    DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+    DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
     run->MarkThreadLocalFreeBitMap(ptr);
@@ -766,7 +779,7 @@
       }
     }
     if (run == current_runs_[idx]) {
-      current_runs_[idx] = NULL;
+      current_runs_[idx] = dedicated_full_run_;
     }
     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
@@ -1233,7 +1246,7 @@
     size_t idx = run->size_bracket_idx_;
     MutexLock mu(self, *size_bracket_locks_[idx]);
     if (run->IsThreadLocal()) {
-      DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+      DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
       DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
       run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
@@ -1627,7 +1640,7 @@
   Thread* self = Thread::Current();
   // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
   WriterMutexLock wmu(self, bulk_free_lock_);
-  for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+  for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
     Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
     CHECK(thread_local_run != nullptr);
@@ -1643,30 +1656,48 @@
       thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
-      if (thread_local_run->IsFull()) {
-        if (kIsDebugBuild) {
-          full_runs_[idx].insert(thread_local_run);
-          DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
-          if (kTraceRosAlloc) {
-            LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
-                      << reinterpret_cast<intptr_t>(thread_local_run)
-                      << " into full_runs_[" << std::dec << idx << "]";
-          }
-        }
-      } else if (thread_local_run->IsAllFree()) {
-        MutexLock mu(self, lock_);
-        thread_local_run->ZeroHeader();
-        FreePages(self, thread_local_run, true);
-      } else {
-        non_full_runs_[idx].insert(thread_local_run);
-        DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
-        if (kTraceRosAlloc) {
-          LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
-                    << reinterpret_cast<intptr_t>(thread_local_run)
-                    << " into non_full_runs_[" << std::dec << idx << "]";
-        }
+      RevokeRun(self, idx, thread_local_run);
+    }
+  }
+}
+
+void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
+  size_bracket_locks_[idx]->AssertHeld(self);
+  DCHECK(run != dedicated_full_run_);
+  if (run->IsFull()) {
+    if (kIsDebugBuild) {
+      full_runs_[idx].insert(run);
+      DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
+      if (kTraceRosAlloc) {
+        LOG(INFO) << __FUNCTION__  << " : Inserted run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run)
+                  << " into full_runs_[" << std::dec << idx << "]";
       }
     }
+  } else if (run->IsAllFree()) {
+    run->ZeroHeader();
+    MutexLock mu(self, lock_);
+    FreePages(self, run, true);
+  } else {
+    non_full_runs_[idx].insert(run);
+    DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
+    if (kTraceRosAlloc) {
+      LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+                << reinterpret_cast<intptr_t>(run)
+                << " into non_full_runs_[" << std::dec << idx << "]";
+    }
+  }
+}
+
+void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
+  // Revoke the current runs which share the same idx as thread local runs.
+  Thread* self = Thread::Current();
+  for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    if (current_runs_[idx] != dedicated_full_run_) {
+      RevokeRun(self, idx, current_runs_[idx]);
+      current_runs_[idx] = dedicated_full_run_;
+    }
   }
 }
 
@@ -1679,6 +1710,7 @@
   for (Thread* thread : thread_list) {
     RevokeThreadLocalRuns(thread);
   }
+  RevokeThreadUnsafeCurrentRuns();
 }
 
 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
@@ -1686,7 +1718,7 @@
     Thread* self = Thread::Current();
     // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
     WriterMutexLock wmu(self, bulk_free_lock_);
-    for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+    for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
       MutexLock mu(self, *size_bracket_locks_[idx]);
       Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
       DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
@@ -1696,18 +1728,21 @@
 
 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
   if (kIsDebugBuild) {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
+    Thread* self = Thread::Current();
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
     for (Thread* t : thread_list) {
       AssertThreadLocalRunsAreRevoked(t);
     }
+    for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      CHECK_EQ(current_runs_[idx], dedicated_full_run_);
+    }
   }
 }
 
 void RosAlloc::Initialize() {
-  // Check the consistency of the number of size brackets.
-  DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
   // bracketSizes.
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
     if (i < kNumOfSizeBrackets - 2) {
@@ -1911,15 +1946,34 @@
           break;
         }
         case kPageMapRunPart:
-          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
-          break;
+          // Fall-through.
         default:
           LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
           break;
       }
     }
   }
-
+  std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
+  for (Thread* thread : threads) {
+    for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
+      MutexLock mu(self, *size_bracket_locks_[i]);
+      Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
+      CHECK(thread_local_run != nullptr);
+      CHECK(thread_local_run->IsThreadLocal());
+      CHECK(thread_local_run == dedicated_full_run_ ||
+            thread_local_run->size_bracket_idx_ == i);
+    }
+  }
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    MutexLock mu(self, *size_bracket_locks_[i]);
+    Run* current_run = current_runs_[i];
+    CHECK(current_run != nullptr);
+    if (current_run != dedicated_full_run_) {
+      // The dedicated full run is currently marked as thread local.
+      CHECK(!current_run->IsThreadLocal());
+      CHECK_EQ(current_run->size_bracket_idx_, i);
+    }
+  }
   // Call Verify() here for the lock order.
   for (auto& run : runs) {
     run->Verify(self, this);
@@ -1952,7 +2006,7 @@
     std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
     for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
       Thread* thread = *it;
-      for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+      for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
         MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
         Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
         if (thread_local_run == this) {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index f7fa2da..21044f3 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -405,11 +405,6 @@
   // at a page-granularity.
   static const size_t kLargeSizeThreshold = 2048;
 
-  // We use use thread-local runs for the size Brackets whose indexes
-  // are less than or equal to this index. We use shared (current)
-  // runs for the rest.
-  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
-
   // If true, check that the returned memory is actually zero.
   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
 
@@ -442,6 +437,10 @@
   // The default value for page_release_size_threshold_.
   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
 
+  // We use thread-local runs for the size Brackets whose indexes
+  // are less than this index. We use shared (current) runs for the rest.
+  static const size_t kNumThreadLocalSizeBrackets = 11;
+
  private:
   // The base address of the memory region that's managed by this allocator.
   byte* base_;
@@ -526,6 +525,12 @@
   // Allocate/free a run slot.
   void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
+  // Allocate/free a run slot without acquiring locks.
+  // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx);
+
   // Returns the bracket size.
   size_t FreeFromRun(Thread* self, void* ptr, Run* run)
       LOCKS_EXCLUDED(lock_);
@@ -543,11 +548,20 @@
   // Allocates large objects.
   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
 
+  // Revoke a run by adding it to non_full_runs_ or freeing the pages.
+  void RevokeRun(Thread* self, size_t idx, Run* run);
+
+  // Revoke the current runs which share an index with the thread local runs.
+  void RevokeThreadUnsafeCurrentRuns();
+
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   ~RosAlloc();
+  // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
+  // If used, this may cause race conditions if multiple threads are allocating at the same time.
+  template<bool kThreadSafe = true>
   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
   size_t Free(Thread* self, void* ptr)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0b26019..b53ee10 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -180,7 +180,9 @@
     runtime->SetFaultMessage(oss.str());
     CHECK_EQ(self_->SetStateUnsafe(old_state), kRunnable);
   }
-
+  // Revoke the thread local buffers since the GC may allocate into a RosAllocSpace and this helps
+  // to prevent fragmentation.
+  RevokeAllThreadLocalBuffers();
   if (generational_) {
     if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
         clear_soft_references_) {
@@ -332,11 +334,8 @@
 class SemiSpaceScanObjectVisitor {
  public:
   explicit SemiSpaceScanObjectVisitor(SemiSpace* ss) : semi_space_(ss) {}
-  void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    // TODO: fix NO_THREAD_SAFETY_ANALYSIS. ScanObject() requires an
-    // exclusive lock on the mutator lock, but
-    // SpaceBitmap::VisitMarkedRange() only requires the shared lock.
+  void operator()(Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_,
+                                                              Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
     semi_space_->ScanObject(obj);
   }
@@ -552,10 +551,11 @@
     // (pseudo-promote) it to the main free list space (as sort
     // of an old generation.)
     space::MallocSpace* promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
-    forward_address = promo_dest_space->Alloc(self_, object_size, &bytes_allocated, nullptr);
+    forward_address = promo_dest_space->AllocThreadUnsafe(self_, object_size, &bytes_allocated,
+                                                          nullptr);
     if (UNLIKELY(forward_address == nullptr)) {
       // If out of space, fall back to the to-space.
-      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
+      forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
     } else {
       bytes_promoted_ += bytes_allocated;
       // Dirty the card at the destionation as it may contain
@@ -599,7 +599,7 @@
     DCHECK(forward_address != nullptr);
   } else {
     // If it's allocated after the last GC (younger), copy it to the to-space.
-    forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr);
+    forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr);
   }
   ++objects_moved_;
   bytes_moved_ += bytes_allocated;
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 9b6df16..3b3e1b1 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -98,12 +98,10 @@
   // Returns the new address of the object.
   template<bool kPoisonReferences>
   void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void ScanObject(mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void VerifyNoFromSpaceReferences(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -150,8 +148,7 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Schedules an unmarked object for reference processing.
   void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4d074f1..5d517bb 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -89,7 +89,7 @@
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
-           const std::string& image_file_name,
+           const std::string& image_file_name, const InstructionSet image_instruction_set,
            CollectorType foreground_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
@@ -186,7 +186,8 @@
   // Requested begin for the alloc space, to follow the mapped image and oat files
   byte* requested_alloc_space_begin = nullptr;
   if (!image_file_name.empty()) {
-    space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str());
+    space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str(),
+                                                               image_instruction_set);
     CHECK(image_space != nullptr) << "Failed to create space for " << image_file_name;
     AddSpace(image_space);
     // Oat files referenced by image files immediately follow them in memory, ensure alloc space
@@ -357,16 +358,16 @@
     can_move_objects = !have_zygote_space_;
   }
   if (kUseRosAlloc) {
-    main_space_ = space::RosAllocSpace::CreateFromMemMap(mem_map, "main rosalloc space",
-                                                         kDefaultStartingSize, initial_size,
-                                                         growth_limit, capacity, low_memory_mode_,
-                                                         can_move_objects);
+    rosalloc_space_ = space::RosAllocSpace::CreateFromMemMap(
+        mem_map, "main rosalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
+        low_memory_mode_, can_move_objects);
+    main_space_ = rosalloc_space_;
     CHECK(main_space_ != nullptr) << "Failed to create rosalloc space";
   } else {
-    main_space_ = space::DlMallocSpace::CreateFromMemMap(mem_map, "main dlmalloc space",
-                                                         kDefaultStartingSize, initial_size,
-                                                         growth_limit, capacity,
-                                                         can_move_objects);
+    dlmalloc_space_ = space::DlMallocSpace::CreateFromMemMap(
+        mem_map, "main dlmalloc space", kDefaultStartingSize, initial_size, growth_limit, capacity,
+        can_move_objects);
+    main_space_ = rosalloc_space_;
     CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
@@ -579,7 +580,7 @@
   thread_pool_.reset(nullptr);
 }
 
-void Heap::AddSpace(space::Space* space, bool set_as_default) {
+void Heap::AddSpace(space::Space* space) {
   DCHECK(space != nullptr);
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   if (space->IsContinuousSpace()) {
@@ -594,18 +595,6 @@
       mark_bitmap_->AddContinuousSpaceBitmap(mark_bitmap);
     }
     continuous_spaces_.push_back(continuous_space);
-    if (set_as_default) {
-      if (continuous_space->IsDlMallocSpace()) {
-        dlmalloc_space_ = continuous_space->AsDlMallocSpace();
-      } else if (continuous_space->IsRosAllocSpace()) {
-        // Revoke before if we already have a rosalloc_space_ so that we don't end up with non full
-        // runs from the previous one during the revoke after.
-        if (rosalloc_space_ != nullptr) {
-          rosalloc_space_->RevokeAllThreadLocalBuffers();
-        }
-        rosalloc_space_ = continuous_space->AsRosAllocSpace();
-      }
-    }
     // Ensure that spaces remain sorted in increasing order of start address.
     std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
               [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
@@ -623,7 +612,16 @@
   }
 }
 
-void Heap::RemoveSpace(space::Space* space, bool unset_as_default) {
+void Heap::SetSpaceAsDefault(space::ContinuousSpace* continuous_space) {
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  if (continuous_space->IsDlMallocSpace()) {
+    dlmalloc_space_ = continuous_space->AsDlMallocSpace();
+  } else if (continuous_space->IsRosAllocSpace()) {
+    rosalloc_space_ = continuous_space->AsRosAllocSpace();
+  }
+}
+
+void Heap::RemoveSpace(space::Space* space) {
   DCHECK(space != nullptr);
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   if (space->IsContinuousSpace()) {
@@ -640,20 +638,6 @@
     auto it = std::find(continuous_spaces_.begin(), continuous_spaces_.end(), continuous_space);
     DCHECK(it != continuous_spaces_.end());
     continuous_spaces_.erase(it);
-    if (unset_as_default) {
-      if (continuous_space == dlmalloc_space_) {
-        dlmalloc_space_ = nullptr;
-      } else if (continuous_space == rosalloc_space_) {
-        rosalloc_space_ = nullptr;
-      }
-      if (continuous_space == main_space_) {
-        main_space_ = nullptr;
-      } else if (continuous_space == bump_pointer_space_) {
-        bump_pointer_space_ = nullptr;
-      } else if (continuous_space == temp_space_) {
-        temp_space_ = nullptr;
-      }
-    }
   } else {
     DCHECK(space->IsDiscontinuousSpace());
     space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
@@ -1469,7 +1453,7 @@
         // Remove the main space so that we don't try to trim it, this doens't work for debug
         // builds since RosAlloc attempts to read the magic number from a protected page.
         // TODO: Clean this up by getting rid of the remove_as_default parameter.
-        RemoveSpace(main_space_, false);
+        RemoveSpace(main_space_);
       }
       break;
     }
@@ -1478,7 +1462,7 @@
     case kCollectorTypeCMS: {
       if (IsMovingGc(collector_type_)) {
         // Compact to the main space from the bump pointer space, don't need to swap semispaces.
-        AddSpace(main_space_, false);
+        AddSpace(main_space_);
         main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
         Compact(main_space_, bump_pointer_space_);
       }
@@ -1693,14 +1677,8 @@
       reset_main_space = true;
     }
     zygote_collector.SetToSpace(&target_space);
-
-    Runtime::Current()->GetThreadList()->SuspendAll();
+    zygote_collector.SetSwapSemiSpaces(false);
     zygote_collector.Run(kGcCauseCollectorTransition, false);
-    if (IsMovingGc(collector_type_)) {
-      SwapSemiSpaces();
-    }
-    Runtime::Current()->GetThreadList()->ResumeAll();
-
     if (reset_main_space) {
       main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
       madvise(main_space_->Begin(), main_space_->Capacity(), MADV_DONTNEED);
@@ -1746,7 +1724,7 @@
                                                                         &non_moving_space_);
   delete old_alloc_space;
   CHECK(zygote_space != nullptr) << "Failed creating zygote space";
-  AddSpace(zygote_space, false);
+  AddSpace(zygote_space);
   non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   AddSpace(non_moving_space_);
   have_zygote_space_ = true;
@@ -2396,8 +2374,7 @@
 }
 
 void Heap::PreGcVerification(collector::GarbageCollector* gc) {
-  if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_ ||
-      verify_pre_gc_rosalloc_) {
+  if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_) {
     collector::GarbageCollector::ScopedPause pause(gc);
     PreGcVerificationPaused(gc);
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c631372..d3b5cdc 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -31,6 +31,7 @@
 #include "gc/collector_type.h"
 #include "globals.h"
 #include "gtest/gtest.h"
+#include "instruction_set.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -140,6 +141,7 @@
                 size_t max_free, double target_utilization,
                 double foreground_heap_growth_multiplier, size_t capacity,
                 const std::string& original_image_file_name,
+                const InstructionSet image_instruction_set,
                 CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
@@ -281,11 +283,12 @@
   void RegisterGCAllocation(size_t bytes);
   void RegisterGCDeAllocation(size_t bytes);
 
-  // Public due to usage by tests.
-  void AddSpace(space::Space* space, bool set_as_default = true)
+  // Set the heap's private space pointers to be the same as the space based on it's type. Public
+  // due to usage by tests.
+  void SetSpaceAsDefault(space::ContinuousSpace* continuous_space)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  void RemoveSpace(space::Space* space, bool unset_as_default = true)
-      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void RemoveSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // Set target ideal heap utilization ratio, implements
   // dalvik.system.VMRuntime.setTargetHeapUtilization.
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 70ab64b..497a61f 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -36,6 +36,26 @@
   return ret;
 }
 
+inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes,
+                                                           size_t* bytes_allocated,
+                                                           size_t* usable_size) {
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  num_bytes = RoundUp(num_bytes, kAlignment);
+  if (end_ + num_bytes > growth_end_) {
+    return nullptr;
+  }
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(end_);
+  end_ += num_bytes;
+  *bytes_allocated = num_bytes;
+  // Use the CAS free versions as an optimization.
+  objects_allocated_ = objects_allocated_ + 1;
+  bytes_allocated_ = bytes_allocated_ + num_bytes;
+  if (UNLIKELY(usable_size != nullptr)) {
+    *usable_size = num_bytes;
+  }
+  return obj;
+}
+
 inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t num_bytes) {
   DCHECK(IsAligned<kAlignment>(num_bytes));
   byte* old_end;
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index e52a9a3..9e61f30 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -48,6 +48,11 @@
   // Allocate num_bytes, returns nullptr if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                         size_t* usable_size) OVERRIDE;
+  // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector.
+  mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                    size_t* usable_size)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   mirror::Object* AllocNonvirtual(size_t num_bytes);
   mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes);
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 91d8820..3de1ba4 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -24,7 +24,6 @@
 #include "mirror/object-inl.h"
 #include "oat_file.h"
 #include "os.h"
-#include "runtime.h"
 #include "space-inl.h"
 #include "utils.h"
 
@@ -99,7 +98,8 @@
   return Exec(arg_vector, error_msg);
 }
 
-ImageSpace* ImageSpace::Create(const char* original_image_file_name) {
+ImageSpace* ImageSpace::Create(const char* original_image_file_name,
+                               const InstructionSet image_isa) {
   if (OS::FileExists(original_image_file_name)) {
     // If the /system file exists, it should be up-to-date, don't try to generate
     std::string error_msg;
@@ -112,7 +112,9 @@
   // If the /system file didn't exist, we need to use one from the dalvik-cache.
   // If the cache file exists, try to open, but if it fails, regenerate.
   // If it does not exist, generate.
-  std::string image_file_name(GetDalvikCacheFilenameOrDie(original_image_file_name));
+  const std::string dalvik_cache = GetDalvikCacheOrDie(GetInstructionSetString(image_isa));
+  std::string image_file_name(GetDalvikCacheFilenameOrDie(original_image_file_name,
+                                                          dalvik_cache.c_str()));
   std::string error_msg;
   if (OS::FileExists(image_file_name.c_str())) {
     space::ImageSpace* image_space = ImageSpace::Init(image_file_name.c_str(), true, &error_msg);
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index f6daf89..1652ec9 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 
 #include "gc/accounting/space_bitmap.h"
+#include "runtime.h"
 #include "space.h"
 
 namespace art {
@@ -34,15 +35,16 @@
     return kSpaceTypeImageSpace;
   }
 
-  // Create a Space from an image file. Cannot be used for future
-  // allocation or collected.
+  // Create a Space from an image file for a specified instruction
+  // set. Cannot be used for future allocation or collected.
   //
   // Create also opens the OatFile associated with the image file so
   // that it be contiguously allocated with the image before the
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static ImageSpace* Create(const char* image, const InstructionSet image_isa)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Releases the OatFile from the ImageSpace so it can be transfer to
   // the caller, presumably the ClassLinker.
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
index d270885..fbfef45 100644
--- a/runtime/gc/space/rosalloc_space-inl.h
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -46,11 +46,15 @@
   return size_by_size;
 }
 
+template<bool kThreadSafe>
 inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes,
                                                   size_t* bytes_allocated, size_t* usable_size) {
   size_t rosalloc_size = 0;
+  if (!kThreadSafe) {
+    Locks::mutator_lock_->AssertExclusiveHeld(self);
+  }
   mirror::Object* result = reinterpret_cast<mirror::Object*>(
-      rosalloc_->Alloc(self, num_bytes, &rosalloc_size));
+      rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_size));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index f5c0e94..a1511e7 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -159,7 +159,7 @@
   }
   // Note RosAlloc zeroes memory internally.
   // Return the new allocation or NULL.
-  CHECK(!kDebugSpaces || result == NULL || Contains(result));
+  CHECK(!kDebugSpaces || result == nullptr || Contains(result));
   return result;
 }
 
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index a156738..2934af8 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -52,6 +52,11 @@
                         size_t* usable_size) OVERRIDE {
     return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size);
   }
+  mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                    size_t* usable_size)
+      OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size);
+  }
   size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE {
     return AllocationSizeNonvirtual(obj, usable_size);
   }
@@ -65,6 +70,11 @@
     // RosAlloc zeroes memory internally.
     return AllocCommon(self, num_bytes, bytes_allocated, usable_size);
   }
+  mirror::Object* AllocNonvirtualThreadUnsafe(Thread* self, size_t num_bytes,
+                                              size_t* bytes_allocated, size_t* usable_size) {
+    // RosAlloc zeroes memory internally. Pass in false for thread unsafe.
+    return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size);
+  }
 
   // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
   size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
@@ -116,6 +126,7 @@
                 size_t starting_size, size_t initial_size, bool low_memory_mode);
 
  private:
+  template<bool kThreadSafe = true>
   mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                               size_t* usable_size);
 
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 0a87a16..dcf5357 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -203,9 +203,17 @@
   // Allocate num_bytes without allowing growth. If the allocation
   // succeeds, the output parameter bytes_allocated will be set to the
   // actually allocated bytes which is >= num_bytes.
+  // Alloc can be called from multiple threads at the same time and must be thread-safe.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                                 size_t* usable_size) = 0;
 
+  // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector.
+  virtual mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
+                                            size_t* usable_size)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return Alloc(self, num_bytes, bytes_allocated, usable_size);
+  }
+
   // Return the storage space required by obj.
   virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0;
 
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 28200df..3335e72 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -38,9 +38,13 @@
   SpaceTest() : byte_array_class_(nullptr) {
   }
 
-  void AddSpace(ContinuousSpace* space) {
-    // By passing true, AddSpace() does the revoke.
-    Runtime::Current()->GetHeap()->AddSpace(space, true);
+  void AddSpace(ContinuousSpace* space, bool revoke = true) {
+    Heap* heap = Runtime::Current()->GetHeap();
+    if (revoke) {
+      heap->RevokeAllThreadLocalBuffers();
+    }
+    heap->AddSpace(space);
+    heap->SetSpaceAsDefault(space);
   }
 
   mirror::Class* GetByteArrayClass(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -227,15 +231,16 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   space::Space* old_space = space;
   heap->RemoveSpace(old_space);
+  heap->RevokeAllThreadLocalBuffers();
   space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
                                                               heap->IsLowMemoryMode(),
                                                               &space);
   delete old_space;
   // Add the zygote space.
-  AddSpace(zygote_space);
+  AddSpace(zygote_space, false);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddSpace(space);
+  AddSpace(space, false);
 
   // Succeeds, fits without adjusting the footprint limit.
   ptr1.reset(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size));
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index 73d4279..cbcd2e0 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -21,6 +21,48 @@
 
 namespace art {
 
+const char* GetInstructionSetString(const InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return "arm";
+    case kArm64:
+      return "arm64";
+    case kX86:
+      return "x86";
+    case kX86_64:
+      return "x86_64";
+    case kMips:
+      return "mips";
+    case kNone:
+      return "none";
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return nullptr;
+  }
+}
+
+InstructionSet GetInstructionSetFromString(const char* isa_str) {
+  CHECK(isa_str != nullptr);
+
+  if (!strcmp("arm", isa_str)) {
+    return kArm;
+  } else if (!strcmp("arm64", isa_str)) {
+    return kArm64;
+  } else if (!strcmp("x86", isa_str)) {
+    return kX86;
+  } else if (!strcmp("x86_64", isa_str)) {
+    return kX86_64;
+  } else if (!strcmp("mips", isa_str)) {
+    return kMips;
+  } else if (!strcmp("none", isa_str)) {
+    return kNone;
+  }
+
+  LOG(FATAL) << "Unknown ISA " << isa_str;
+  return kNone;
+}
+
 size_t GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index c746e06..4bc35a7 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -35,6 +35,9 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+const char* GetInstructionSetString(const InstructionSet isa);
+InstructionSet GetInstructionSetFromString(const char* instruction_set);
+
 size_t GetInstructionSetPointerSize(InstructionSet isa);
 size_t GetInstructionSetAlignment(InstructionSet isa);
 bool Is64BitInstructionSet(InstructionSet isa);
diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc
new file mode 100644
index 0000000..cd6337c
--- /dev/null
+++ b/runtime/instruction_set_test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class InstructionSetTest : public CommonRuntimeTest {};
+
+TEST_F(InstructionSetTest, GetInstructionSetFromString) {
+  EXPECT_EQ(kArm, GetInstructionSetFromString("arm"));
+  EXPECT_EQ(kArm64, GetInstructionSetFromString("arm64"));
+  EXPECT_EQ(kX86, GetInstructionSetFromString("x86"));
+  EXPECT_EQ(kX86_64, GetInstructionSetFromString("x86_64"));
+  EXPECT_EQ(kMips, GetInstructionSetFromString("mips"));
+  EXPECT_EQ(kNone, GetInstructionSetFromString("none"));
+}
+
+TEST_F(InstructionSetTest, GetInstructionSetString) {
+  EXPECT_STREQ("arm", GetInstructionSetString(kArm));
+  EXPECT_STREQ("arm", GetInstructionSetString(kThumb2));
+  EXPECT_STREQ("arm64", GetInstructionSetString(kArm64));
+  EXPECT_STREQ("x86", GetInstructionSetString(kX86));
+  EXPECT_STREQ("x86_64", GetInstructionSetString(kX86_64));
+  EXPECT_STREQ("mips", GetInstructionSetString(kMips));
+  EXPECT_STREQ("none", GetInstructionSetString(kNone));
+}
+
+TEST_F(InstructionSetTest, TestRoundTrip) {
+  EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
+}
+
+}  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 953d3a6..d9c1309 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -260,16 +260,15 @@
 #endif
 }
 
-static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
-    jstring javaPkgname, jboolean defer) {
+static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename,
+    const char* pkgname, const char* instruction_set, const jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kReasonLogging = true;  // Logging of reason for returning JNI_TRUE.
 
-  ScopedUtfChars filename(env, javaFilename);
-  if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) {
-    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
+  if ((filename == nullptr) || !OS::FileExists(filename)) {
+    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
-    const char* message = (filename.c_str() == nullptr) ? "<empty file name>" : filename.c_str();
+    const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
     return JNI_FALSE;
   }
@@ -278,11 +277,14 @@
   // fact that code is running at all means that this should be true.
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
+  // TODO: We're assuming that the 64 and 32 bit runtimes have identical
+  // class paths. isDexOptNeeded will not necessarily be called on a runtime
+  // that has the same instruction set as the file being dexopted.
   const std::vector<const DexFile*>& boot_class_path = class_linker->GetBootClassPath();
   for (size_t i = 0; i < boot_class_path.size(); i++) {
-    if (boot_class_path[i]->GetLocation() == filename.c_str()) {
+    if (boot_class_path[i]->GetLocation() == filename) {
       if (kVerboseLogging) {
-        LOG(INFO) << "DexFile_isDexOptNeeded ignoring boot class path file: " << filename.c_str();
+        LOG(INFO) << "DexFile_isDexOptNeeded ignoring boot class path file: " << filename;
       }
       return JNI_FALSE;
     }
@@ -293,12 +295,11 @@
   // If the 'defer' argument is true then this will be retried later.  In this case we
   // need to make sure that the profile file copy is not made so that we will get the
   // same result second time.
-  if (javaPkgname != NULL) {
-    ScopedUtfChars pkgname(env, javaPkgname);
-    std::string profile_file = GetDalvikCacheOrDie(GetAndroidData()) + std::string("/profiles/") +
-    pkgname.c_str();
-
-    std::string profile_cache_dir = GetDalvikCacheOrDie(GetAndroidData()) + "/profile-cache";
+  if (pkgname != nullptr) {
+    const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */)
+        + std::string("/") + pkgname;
+    const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache",
+                                                              false /* create_if_absent */);
 
     // Make the profile cache if it doesn't exist.
     mkdir(profile_cache_dir.c_str(), 0700);
@@ -306,7 +307,7 @@
     // The previous profile file (a copy of the profile the last time this was run) is
     // in the dalvik-cache directory because this is owned by system.  The profiles
     // directory is owned by install so system cannot write files in there.
-    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname.c_str();
+    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname;
 
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
@@ -377,41 +378,41 @@
   }
 
   // Check if we have an odex file next to the dex file.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename.c_str()));
+  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename));
   std::string error_msg;
   UniquePtr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
                                                   &error_msg));
   if (oat_file.get() == nullptr) {
     if (kVerboseLogging) {
-      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename.c_str()
+      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
           << "': " << error_msg;
     }
     error_msg.clear();
   } else {
-    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename.c_str(), NULL,
+    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
                                                                            kReasonLogging);
     if (oat_dex_file != nullptr) {
       uint32_t location_checksum;
       // If its not possible to read the classes.dex assume up-to-date as we won't be able to
       // compile it anyway.
-      if (!DexFile::GetChecksum(filename.c_str(), &location_checksum, &error_msg)) {
+      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
-              << filename.c_str() << ": " << error_msg;
+              << filename << ": " << error_msg;
         }
         return JNI_FALSE;
       }
-      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum,
+      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
                                               &error_msg)) {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
-              << " has an up-to-date checksum compared to " << filename.c_str();
+              << " has an up-to-date checksum compared to " << filename;
         }
         return JNI_FALSE;
       } else {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
-              << " with an out-of-date checksum compared to " << filename.c_str()
+              << " with an out-of-date checksum compared to " << filename
               << ": " << error_msg;
         }
         error_msg.clear();
@@ -420,12 +421,14 @@
   }
 
   // Check if we have an oat file in the cache
-  std::string cache_location(GetDalvikCacheFilenameOrDie(filename.c_str()));
-  oat_file.reset(OatFile::Open(cache_location, filename.c_str(), NULL, false, &error_msg));
+  const std::string cache_dir(GetDalvikCacheOrDie(instruction_set));
+  const std::string cache_location(
+      GetDalvikCacheFilenameOrDie(filename, cache_dir.c_str()));
+  oat_file.reset(OatFile::Open(cache_location, filename, NULL, false, &error_msg));
   if (oat_file.get() == nullptr) {
     if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-          << " does not exist for " << filename.c_str() << ": " << error_msg;
+          << " does not exist for " << filename << ": " << error_msg;
     }
     return JNI_TRUE;
   }
@@ -458,19 +461,19 @@
   }
 
   uint32_t location_checksum;
-  if (!DexFile::GetChecksum(filename.c_str(), &location_checksum, &error_msg)) {
+  if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
     if (kReasonLogging) {
-      LOG(ERROR) << "DexFile_isDexOptNeeded failed to compute checksum of " << filename.c_str()
+      LOG(ERROR) << "DexFile_isDexOptNeeded failed to compute checksum of " << filename
             << " (error " << error_msg << ")";
     }
     return JNI_TRUE;
   }
 
-  if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum,
+  if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
                                            &error_msg)) {
     if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-          << " has out-of-date checksum compared to " << filename.c_str()
+          << " has out-of-date checksum compared to " << filename
           << " (error " << error_msg << ")";
     }
     return JNI_TRUE;
@@ -478,15 +481,28 @@
 
   if (kVerboseLogging) {
     LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-              << " is up-to-date for " << filename.c_str();
+              << " is up-to-date for " << filename;
   }
   CHECK(error_msg.empty()) << error_msg;
   return JNI_FALSE;
 }
 
+static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+    jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
+  ScopedUtfChars filename(env, javaFilename);
+  NullableScopedUtfChars pkgname(env, javaPkgname);
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+
+  return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(),
+                                instruction_set.c_str(), defer);
+}
+
 // public API, NULL pkgname
-static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass c, jstring javaFilename) {
-  return DexFile_isDexOptNeededInternal(env, c, javaFilename, NULL, false);
+static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
+  const char* instruction_set = GetInstructionSetString(kRuntimeISA);
+  ScopedUtfChars filename(env, javaFilename);
+  return IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */,
+                                instruction_set, false /* defer */);
 }
 
 
@@ -495,7 +511,7 @@
   NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Z)Z"),
+  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Z"),
   NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)J"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index c0dc94b..9cf8785 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -246,6 +246,7 @@
   profile_clock_source_ = kDefaultProfilerClockSource;
 
   verify_ = true;
+  image_isa_ = kRuntimeISA;
 
   // Default to explicit checks.  Switch off with -implicit-checks:.
   // or setprop dalvik.vm.implicit_checks check1,check2,...
@@ -412,6 +413,9 @@
     } else if (option == "compilercallbacks") {
       compiler_callbacks_ =
           reinterpret_cast<CompilerCallbacks*>(const_cast<void*>(options[i].second));
+    } else if (option == "imageinstructionset") {
+      image_isa_ = GetInstructionSetFromString(
+          reinterpret_cast<const char*>(options[i].second));
     } else if (option == "-Xzygote") {
       is_zygote_ = true;
     } else if (option == "-Xint") {
@@ -673,7 +677,7 @@
     background_collector_type_ = collector_type_;
   }
   return true;
-}
+}  // NOLINT(readability/fn_size)
 
 void ParsedOptions::Exit(int status) {
   hook_exit_(status);
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index c02eb1d..e0b0fb5 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -85,6 +85,7 @@
   bool profile_start_immediately_;
   ProfilerClockSource profile_clock_source_;
   bool verify_;
+  InstructionSet image_isa_;
 
   static constexpr uint32_t kExplicitNullCheck = 1;
   static constexpr uint32_t kExplicitSuspendCheck = 2;
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a91fdf1..aee0d64 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -19,6 +19,7 @@
 #include "catch_block_stack_visitor.h"
 #include "deoptimize_stack_visitor.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "mirror/art_method-inl.h"
 #include "sirt_ref-inl.h"
 
 namespace art {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6bbfcee..20df78e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -567,6 +567,7 @@
                        options->foreground_heap_growth_multiplier_,
                        options->heap_maximum_size_,
                        options->image_,
+                       options->image_isa_,
                        options->collector_type_,
                        options->background_collector_type_,
                        options->parallel_gc_threads_,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7470670..e67a64f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1018,7 +1018,8 @@
   tls32_.state_and_flags.as_struct.flags = 0;
   tls32_.state_and_flags.as_struct.state = kNative;
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
-  std::fill(tlsPtr_.rosalloc_runs, tlsPtr_.rosalloc_runs + kRosAllocNumOfSizeBrackets,
+  std::fill(tlsPtr_.rosalloc_runs,
+            tlsPtr_.rosalloc_runs + gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets,
             gc::allocator::RosAlloc::GetDedicatedFullRun());
   for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
     tlsPtr_.checkpoint_functions[i] = nullptr;
diff --git a/runtime/thread.h b/runtime/thread.h
index f869285..8c17082 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -29,6 +29,7 @@
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/portable/portable_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
@@ -783,9 +784,6 @@
     return tlsPtr_.thread_local_objects;
   }
 
-  // ROS alloc TLS.
-  static constexpr size_t kRosAllocNumOfSizeBrackets = 34;
-
   void* GetRosAllocRun(size_t index) const {
     return tlsPtr_.rosalloc_runs[index];
   }
@@ -1060,12 +1058,8 @@
     byte* thread_local_end;
     size_t thread_local_objects;
 
-    // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
-    // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
-    // RosAlloc class due to a header file circular dependency issue.
-    // To compensate, we check that the two values match at RosAlloc
-    // initialization time.
-    void* rosalloc_runs[kRosAllocNumOfSizeBrackets];
+    // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread.
+    void* rosalloc_runs[gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets];
 
     // Thread-local allocation stack data/routines.
     mirror::Object** thread_local_alloc_stack_top;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index c4d1a78..ee2cca4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1167,14 +1167,20 @@
   return android_data;
 }
 
-std::string GetDalvikCacheOrDie(const char* android_data) {
-  std::string dalvik_cache(StringPrintf("%s/dalvik-cache", android_data));
-
-  if (!OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache, "/tmp/")) {
-      int result = mkdir(dalvik_cache.c_str(), 0700);
+std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+  CHECK(subdir != nullptr);
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const std::string dalvik_cache = dalvik_cache_root + subdir;
+  if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
+    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+      int result = mkdir(dalvik_cache_root.c_str(), 0700);
+      if (result != 0 && errno != EEXIST) {
+        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
+        return "";
+      }
+      result = mkdir(dalvik_cache.c_str(), 0700);
       if (result != 0) {
-        LOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
+        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
         return "";
       }
     } else {
@@ -1185,8 +1191,7 @@
   return dalvik_cache;
 }
 
-std::string GetDalvikCacheFilenameOrDie(const char* location) {
-  std::string dalvik_cache(GetDalvikCacheOrDie(GetAndroidData()));
+std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
   if (location[0] != '/') {
     LOG(FATAL) << "Expected path in location to be absolute: "<< location;
   }
@@ -1196,7 +1201,7 @@
     cache_file += DexFile::kClassesDex;
   }
   std::replace(cache_file.begin(), cache_file.end(), '/', '@');
-  return dalvik_cache + "/" + cache_file;
+  return StringPrintf("%s/%s", cache_location, cache_file.c_str());
 }
 
 bool IsZipMagic(uint32_t magic) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 6ab1013..4b2f230 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -394,11 +394,14 @@
 // Find $ANDROID_DATA, /data, or abort.
 const char* GetAndroidData();
 
-// Returns the dalvik-cache location, or dies trying.
-std::string GetDalvikCacheOrDie(const char* android_data);
+// Returns the dalvik-cache location, or dies trying. subdir will be
+// appended to the cache location.
+std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
 
-// Returns the dalvik-cache location for a DexFile or OatFile, or dies trying.
-std::string GetDalvikCacheFilenameOrDie(const char* location);
+// Returns the absolute dalvik-cache path for a DexFile or OatFile, or
+// dies trying. The path returned will be rooted at cache_location.
+std::string GetDalvikCacheFilenameOrDie(const char* file_location,
+                                        const char* cache_location);
 
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 2c1aae8..d425620 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -338,18 +338,16 @@
   EXPECT_FALSE(EndsWith("oo", "foo"));
 }
 
-void CheckGetDalvikCacheFilenameOrDie(const char* in, const char* out) {
-  std::string expected(getenv("ANDROID_DATA"));
-  expected += "/dalvik-cache/";
-  expected += out;
-  EXPECT_STREQ(expected.c_str(), GetDalvikCacheFilenameOrDie(in).c_str());
-}
-
 TEST_F(UtilsTest, GetDalvikCacheFilenameOrDie) {
-  CheckGetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "system@app@Foo.apk@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "data@app@foo-1.apk@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/system/framework/core.jar", "system@framework@core.jar@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/system/framework/boot.art", "system@framework@boot.art");
+  EXPECT_STREQ("/foo/system@app@Foo.apk@classes.dex",
+               GetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "/foo").c_str());
+
+  EXPECT_STREQ("/foo/data@app@foo-1.apk@classes.dex",
+               GetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "/foo").c_str());
+  EXPECT_STREQ("/foo/system@framework@core.jar@classes.dex",
+               GetDalvikCacheFilenameOrDie("/system/framework/core.jar", "/foo").c_str());
+  EXPECT_STREQ("/foo/system@framework@boot.art",
+               GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
 }
 
 TEST_F(UtilsTest, ExecSuccess) {
diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java
index d53cf5e..3c4ed35 100644
--- a/test/JniTest/JniTest.java
+++ b/test/JniTest/JniTest.java
@@ -24,6 +24,10 @@
         testCallStaticVoidMethodOnSubClass();
         testGetMirandaMethod();
         testZeroLengthByteBuffers();
+        testByteMethod();
+        testShortMethod();
+        testBooleanMethod();
+        testCharMethod();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -79,4 +83,67 @@
     private static interface testGetMirandaMethod_MirandaInterface {
         public boolean inInterface();
     }
+
+    // Test sign-extension for values < 32b
+
+    native static byte byteMethod(byte b1, byte b2, byte b3, byte b4, byte b5, byte b6, byte b7,
+        byte b8, byte b9, byte b10);
+
+    private static void testByteMethod() {
+      byte returns[] = { 0, 1, 2, 127, -1, -2, -128 };
+      for (int i = 0; i < returns.length; i++) {
+        byte result = byteMethod((byte)i, (byte)2, (byte)(-3), (byte)4, (byte)(-5), (byte)6,
+            (byte)(-7), (byte)8, (byte)(-9), (byte)10);
+        if (returns[i] != result) {
+          System.out.println("Run " + i + " with " + returns[i] + " vs " + result);
+          throw new AssertionError();
+        }
+      }
+    }
+
+    native static short shortMethod(short s1, short s2, short s3, short s4, short s5, short s6, short s7,
+        short s8, short s9, short s10);
+
+    private static void testShortMethod() {
+      short returns[] = { 0, 1, 2, 127, 32767, -1, -2, -128, -32768 };
+      for (int i = 0; i < returns.length; i++) {
+        short result = shortMethod((short)i, (short)2, (short)(-3), (short)4, (short)(-5), (short)6,
+            (short)(-7), (short)8, (short)(-9), (short)10);
+        if (returns[i] != result) {
+          System.out.println("Run " + i + " with " + returns[i] + " vs " + result);
+          throw new AssertionError();
+        }
+      }
+    }
+
+    // Test zero-extension for values < 32b
+
+    native static boolean booleanMethod(boolean b1, boolean b2, boolean b3, boolean b4, boolean b5, boolean b6, boolean b7,
+        boolean b8, boolean b9, boolean b10);
+
+    private static void testBooleanMethod() {
+      if (booleanMethod(false, true, false, true, false, true, false, true, false, true)) {
+        throw new AssertionError();
+      }
+
+      if (!booleanMethod(true, true, false, true, false, true, false, true, false, true)) {
+        throw new AssertionError();
+      }
+    }
+
+    native static char charMethod(char c1, char c2, char c3, char c4, char c5, char c6, char c7,
+        char c8, char c9, char c10);
+
+    private static void testCharMethod() {
+      char returns[] = { (char)0, (char)1, (char)2, (char)127, (char)255, (char)256, (char)15000,
+          (char)34000 };
+      for (int i = 0; i < returns.length; i++) {
+        char result = charMethod((char)i, 'a', 'b', 'c', '0', '1', '2', (char)1234, (char)2345,
+            (char)3456);
+        if (returns[i] != result) {
+          System.out.println("Run " + i + " with " + (int)returns[i] + " vs " + (int)result);
+          throw new AssertionError();
+        }
+      }
+    }
 }
diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc
index 33af94b..024ba53 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/JniTest/jni_test.cc
@@ -137,3 +137,92 @@
   assert(env->GetDirectBufferAddress(byte_buffer) == &buffer[0]);
   assert(env->GetDirectBufferCapacity(byte_buffer) == 0);
 }
+
+constexpr size_t kByteReturnSize = 7;
+jbyte byte_returns[kByteReturnSize] = { 0, 1, 2, 127, -1, -2, -128 };
+
+extern "C" jbyte JNICALL Java_JniTest_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2,
+                                                    jbyte b3, jbyte b4, jbyte b5, jbyte b6,
+                                                    jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
+  // We use b1 to drive the output.
+  assert(b2 == 2);
+  assert(b3 == -3);
+  assert(b4 == 4);
+  assert(b5 == -5);
+  assert(b6 == 6);
+  assert(b7 == -7);
+  assert(b8 == 8);
+  assert(b9 == -9);
+  assert(b10 == 10);
+
+  assert(0 <= b1);
+  assert(b1 < static_cast<jbyte>(kByteReturnSize));
+
+  return byte_returns[b1];
+}
+
+constexpr size_t kShortReturnSize = 9;
+jshort short_returns[kShortReturnSize] = { 0, 1, 2, 127, 32767, -1, -2, -128,
+    static_cast<jshort>(0x8000) };
+// The weird static_cast is because short int is only guaranteed down to -32767, not Java's -32768.
+
+extern "C" jshort JNICALL Java_JniTest_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2,
+                                                    jshort s3, jshort s4, jshort s5, jshort s6,
+                                                    jshort s7, jshort s8, jshort s9, jshort s10) {
+  // We use s1 to drive the output.
+  assert(s2 == 2);
+  assert(s3 == -3);
+  assert(s4 == 4);
+  assert(s5 == -5);
+  assert(s6 == 6);
+  assert(s7 == -7);
+  assert(s8 == 8);
+  assert(s9 == -9);
+  assert(s10 == 10);
+
+  assert(0 <= s1);
+  assert(s1 < static_cast<jshort>(kShortReturnSize));
+
+  return short_returns[s1];
+}
+
+extern "C" jboolean JNICALL Java_JniTest_booleanMethod(JNIEnv* env, jclass klass, jboolean b1,
+                                                       jboolean b2, jboolean b3, jboolean b4,
+                                                       jboolean b5, jboolean b6, jboolean b7,
+                                                       jboolean b8, jboolean b9, jboolean b10) {
+  // We use b1 to drive the output.
+  assert(b2 == JNI_TRUE);
+  assert(b3 == JNI_FALSE);
+  assert(b4 == JNI_TRUE);
+  assert(b5 == JNI_FALSE);
+  assert(b6 == JNI_TRUE);
+  assert(b7 == JNI_FALSE);
+  assert(b8 == JNI_TRUE);
+  assert(b9 == JNI_FALSE);
+  assert(b10 == JNI_TRUE);
+
+  assert(b1 == JNI_TRUE || b1 == JNI_FALSE);
+  return b1;
+}
+
+constexpr size_t kCharReturnSize = 8;
+jchar char_returns[kCharReturnSize] = { 0, 1, 2, 127, 255, 256, 15000, 34000 };
+
+extern "C" jchar JNICALL Java_JniTest_charMethod(JNIEnv* env, jclass klacc, jchar c1, jchar c2,
+                                                    jchar c3, jchar c4, jchar c5, jchar c6,
+                                                    jchar c7, jchar c8, jchar c9, jchar c10) {
+  // We use c1 to drive the output.
+  assert(c2 == 'a');
+  assert(c3 == 'b');
+  assert(c4 == 'c');
+  assert(c5 == '0');
+  assert(c6 == '1');
+  assert(c7 == '2');
+  assert(c8 == 1234);
+  assert(c9 == 2345);
+  assert(c10 == 3456);
+
+  assert(c1 < static_cast<jchar>(kCharReturnSize));
+
+  return char_returns[c1];
+}
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index 93d7e79..e0d2f1d 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -140,7 +140,7 @@
 fi
 
 if [ "$GDB" = "y" ]; then
-    gdb="/data/gdbserver$TARGET_SUFFIX :5039"
+    gdb="gdbserver$TARGET_SUFFIX :5039"
     gdbargs="$exe"
 fi
 
@@ -150,7 +150,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir dalvik-cache && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"
diff --git a/tools/art b/tools/art
index c9c0d4f..e3f409c 100755
--- a/tools/art
+++ b/tools/art
@@ -48,7 +48,7 @@
 ANDROID_HOST_OUT=$PROG_DIR/..
 ANDROID_DATA=$PWD/android-data$$
 
-mkdir -p $ANDROID_DATA/dalvik-cache
+mkdir -p $ANDROID_DATA/dalvik-cache/{x86,x86_64}
 ANDROID_DATA=$ANDROID_DATA \
   ANDROID_ROOT=$ANDROID_HOST_OUT \
   LD_LIBRARY_PATH=$ANDROID_HOST_OUT/lib \