Merge "art/test: support sequential run"
diff --git a/build/Android.common.mk b/build/Android.common.mk
index aaa1490..6135571 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -123,6 +123,11 @@
   ART_HOST_CLANG := true
 endif
 
+# enable ART_TARGET_CLANG for ARM64
+ifneq (,$(filter $(TARGET_ARCH),arm64))
+ART_TARGET_CLANG := true
+endif
+
 # directory used for dalvik-cache on device
 ART_DALVIK_CACHE_DIR := /data/dalvik-cache
 
@@ -195,6 +200,14 @@
 	-Wstrict-aliasing \
 	-fstrict-aliasing
 
+# these are necessary for Clang ARM64 ART builds
+ifeq ($(ART_TARGET_CLANG), true)
+art_cflags += \
+	-Wno-implicit-exception-spec-mismatch \
+	-DNVALGRIND \
+	-Wno-unused-value
+endif
+
 ifeq ($(ART_SMALL_MODE),true)
   art_cflags += -DART_SMALL_MODE=1
 endif
@@ -209,7 +222,13 @@
 endif
 
 art_non_debug_cflags := \
-        -O3
+	-O3
+
+# FIXME: upstream LLVM has a vectorizer bug that needs to be fixed
+ifeq ($(ART_TARGET_CLANG),true)
+art_non_debug_cflags += \
+        -fno-vectorize
+endif
 
 art_debug_cflags := \
 	-O1 \
@@ -238,8 +257,9 @@
 ifneq ($(filter 4.6 4.6.%, $(TARGET_GCC_VERSION)),)
   ART_TARGET_CFLAGS += -Wthread-safety
 else
+  # FIXME: add -Wthread-safety when the problem is fixed
   ifeq ($(ART_TARGET_CLANG),true)
-    ART_TARGET_CFLAGS += -Wthread-safety
+    ART_TARGET_CFLAGS +=
   else
     # Warn if -Wthread-safety is not suport and not doing a top-level or 'mma' build.
     ifneq ($(ONE_SHOT_MAKEFILE),)
@@ -304,7 +324,7 @@
 #         Has one argument, the suffix
 define call-art-multi-target
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
   endif
@@ -329,10 +349,10 @@
 #         Has one argument, the suffix
 define call-art-multi-target-var
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
-    
+
     # Link both together, if it makes sense
     ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
       ifneq ($(2ND_ART_PHONY_TEST_TARGET_SUFFIX),)
@@ -351,10 +371,10 @@
 #       We assume we can link the names together easily...
 define call-art-multi-target-rule
   $(call $(1),$(ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
   ifdef TARGET_2ND_ARCH
     $(call $(1),$(2ND_ART_PHONY_TEST_TARGET_SUFFIX))
-  
+
     # Link both together, if it makes sense
     ifneq ($(ART_PHONY_TEST_TARGET_SUFFIX),)
       ifneq ($(2ND_ART_PHONY_TEST_TARGET_SUFFIX),)
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 36f1be7..2f17e08 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -879,8 +879,8 @@
       new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapLiveIn);
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (df_attributes & DF_HAS_USES) {
       if (df_attributes & DF_UA) {
@@ -949,7 +949,7 @@
 
 /* Look up new SSA names for format_35c instructions */
 void MIRGraph::DataFlowSSAFormat35C(MIR* mir) {
-  DecodedInstruction *d_insn = &mir->dalvikInsn;
+  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
   int num_uses = d_insn->vA;
   int i;
 
@@ -967,7 +967,7 @@
 
 /* Look up new SSA names for format_3rc instructions */
 void MIRGraph::DataFlowSSAFormat3RC(MIR* mir) {
-  DecodedInstruction *d_insn = &mir->dalvikInsn;
+  MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
   int num_uses = d_insn->vA;
   int i;
 
@@ -994,7 +994,7 @@
         static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
                                                               kArenaAllocDFInfo));
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
       // If not a pseudo-op, note non-leaf or can throw
     if (static_cast<int>(mir->dalvikInsn.opcode) <
@@ -1064,7 +1064,7 @@
                                                               kArenaAllocDFInfo));
     }
 
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (df_attributes & DF_HAS_USES) {
       num_uses = 0;
@@ -1252,7 +1252,7 @@
       use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = GetDataFlowAttributes(mir);
       // Implicit use of Method* ? */
       if (df_attributes & DF_UMS) {
         /*
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 8ce4f1f..30d0bc3 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -21,6 +21,7 @@
 #include "base/stl_util.h"
 #include "compiler_internals.h"
 #include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
@@ -106,11 +107,17 @@
 /*
  * Parse an instruction, return the length of the instruction
  */
-int MIRGraph::ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_instruction) {
-  const Instruction* instruction = Instruction::At(code_ptr);
-  *decoded_instruction = DecodedInstruction(instruction);
-
-  return instruction->SizeInCodeUnits();
+int MIRGraph::ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction) {
+  const Instruction* inst = Instruction::At(code_ptr);
+  decoded_instruction->opcode = inst->Opcode();
+  decoded_instruction->vA = inst->HasVRegA() ? inst->VRegA() : 0;
+  decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
+  decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
+  decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
+  if (inst->HasVarArgs()) {
+    inst->GetVarArgs(decoded_instruction->arg);
+  }
+  return inst->SizeInCodeUnits();
 }
 
 
@@ -621,7 +628,7 @@
     int flags = Instruction::FlagsOf(insn->dalvikInsn.opcode);
     int verify_flags = Instruction::VerifyFlagsOf(insn->dalvikInsn.opcode);
 
-    uint64_t df_flags = oat_data_flow_attributes_[insn->dalvikInsn.opcode];
+    uint64_t df_flags = GetDataFlowAttributes(insn);
     merged_df_flags |= df_flags;
 
     if (df_flags & DF_HAS_DEFS) {
@@ -743,6 +750,17 @@
   }
 }
 
+uint64_t MIRGraph::GetDataFlowAttributes(Instruction::Code opcode) {
+  DCHECK_LT((size_t) opcode, (sizeof(oat_data_flow_attributes_) / sizeof(oat_data_flow_attributes_[0])));
+  return oat_data_flow_attributes_[opcode];
+}
+
+uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) {
+  DCHECK(mir != nullptr);
+  Instruction::Code opcode = mir->dalvikInsn.opcode;
+  return GetDataFlowAttributes(opcode);
+}
+
 // TODO: use a configurable base prefix, and adjust callers to supply pass name.
 /* Dump the CFG into a DOT graph */
 void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) {
@@ -924,7 +942,7 @@
 }
 
 char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
-  DecodedInstruction insn = mir->dalvikInsn;
+  MIR::DecodedInstruction insn = mir->dalvikInsn;
   std::string str;
   int flags = 0;
   int opcode = insn.opcode;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 2c125f6..b926503 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -254,7 +254,15 @@
    * additional fields on as-needed basis.  Question: how to support MIR Pseudo-ops; probably
    * need to carry aux data pointer.
    */
-  DecodedInstruction dalvikInsn;
+  struct DecodedInstruction {
+    uint32_t vA;
+    uint32_t vB;
+    uint64_t vB_wide;        /* for k51l */
+    uint32_t vC;
+    uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
+    Instruction::Code opcode;
+  } dalvikInsn;
+
   uint16_t width;                 // Note: width can include switch table or fill array data.
   NarrowDexOffset offset;         // Offset of the instruction in code units.
   uint16_t optimization_flags;
@@ -851,6 +859,9 @@
    */
   void CountUses(struct BasicBlock* bb);
 
+  static uint64_t GetDataFlowAttributes(Instruction::Code opcode);
+  static uint64_t GetDataFlowAttributes(MIR* mir);
+
   /**
    * @brief Combine BasicBlocks
    * @param the BasicBlock we are considering
@@ -868,7 +879,6 @@
   RegLocation* reg_location_;                         // Map SSA names to location.
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
-  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
   static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst];
   static const uint32_t analysis_attributes_[kMirOpLast];
 
@@ -882,7 +892,7 @@
   void CompilerInitializeSSAConversion();
   bool DoSSAConversion(BasicBlock* bb);
   bool InvokeUsesMethodStar(MIR* mir);
-  int ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_instruction);
+  int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
   bool ContentIsInsn(const uint16_t* code_ptr);
   BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
                          BasicBlock** immed_pred_block_p);
@@ -985,6 +995,7 @@
   GrowableArray<MirIFieldLoweringInfo> ifield_lowering_infos_;
   GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
   GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
+  static const uint64_t oat_data_flow_attributes_[kMirOpLast];
 
   friend class ClassInitCheckEliminationTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 937e258..9f15cd4 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -43,13 +43,13 @@
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     // Skip pass if BB has MIR without SSA representation.
-    if (mir->ssa_rep == NULL) {
+    if (mir->ssa_rep == nullptr) {
        return;
     }
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
-    DecodedInstruction *d_insn = &mir->dalvikInsn;
+    MIR::DecodedInstruction* d_insn = &mir->dalvikInsn;
 
     if (!(df_attributes & DF_HAS_DEFS)) continue;
 
@@ -559,7 +559,7 @@
       if (mir->ssa_rep == NULL) {
         continue;
       }
-      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = GetDataFlowAttributes(mir);
       if (df_attributes & DF_HAS_NULL_CHKS) {
         checkstats_->null_checks++;
         if (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) {
@@ -644,7 +644,7 @@
     MIR* mir = bb->last_mir_insn;
     // Grab the attributes from the paired opcode
     MIR* throw_insn = mir->meta.throw_insn;
-    uint64_t df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(throw_insn);
     bool can_combine = true;
     if (df_attributes & DF_HAS_NULL_CHKS) {
       can_combine &= ((throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0);
@@ -796,7 +796,7 @@
       continue;
     }
 
-    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = GetDataFlowAttributes(mir);
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 40ced70..891d9fb 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -172,7 +172,7 @@
       mir->offset = 2 * i;  // All insns need to be at least 2 code units long.
       mir->width = 2u;
       mir->optimization_flags = 0u;
-      merged_df_flags |= MIRGraph::oat_data_flow_attributes_[def->opcode];
+      merged_df_flags |= MIRGraph::GetDataFlowAttributes(def->opcode);
     }
     cu_.mir_graph->merged_df_flags_ = merged_df_flags;
 
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 70438ec..576e242 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -722,7 +722,7 @@
   /* Prep Src and Dest locations */
   int next_sreg = 0;
   int next_loc = 0;
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MirGraph::GetDataFlowAttributes(opcode);
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index b030bb4..0596d4f 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1018,8 +1018,8 @@
       vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
     }
   } else {
-    DCHECK_EQ(__builtin_popcount(core_spill_mask_), 0);
-    DCHECK_EQ(__builtin_popcount(fp_spill_mask_), 0);
+    DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
+    DCHECK_EQ(POPCOUNT(fp_spill_mask_), 0);
     DCHECK_EQ(core_vmap_table_.size(), 0u);
     DCHECK_EQ(fp_vmap_table_.size(), 0u);
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 8b9a686..3cc2ba0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -330,9 +330,10 @@
     bool is_type_initialized;  // Ignored as an array does not have an initializer.
     bool use_direct_type_ptr;
     uintptr_t direct_type_ptr;
+    bool is_finalizable;
     if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx,
-                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
+                                   &direct_type_ptr, &is_finalizable)) {
       // The fast path.
       if (!use_direct_type_ptr) {
         LoadClassType(type_idx, kArg0);
@@ -980,9 +981,11 @@
     bool is_type_initialized;
     bool use_direct_type_ptr;
     uintptr_t direct_type_ptr;
+    bool is_finalizable;
     if (kEmbedClassInCode &&
-        driver->CanEmbedTypeInCode(*dex_file, type_idx,
-                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+        driver->CanEmbedTypeInCode(*dex_file, type_idx, &is_type_initialized, &use_direct_type_ptr,
+                                   &direct_type_ptr, &is_finalizable) &&
+                                   !is_finalizable) {
       // The fast path.
       if (!use_direct_type_ptr) {
         LoadClassType(type_idx, kArg0);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6c5279e..107987e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -286,7 +286,7 @@
   // Prep Src and Dest locations.
   int next_sreg = 0;
   int next_loc = 0;
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MIRGraph::GetDataFlowAttributes(opcode);
   rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 4d45055..b972d08 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -889,7 +889,7 @@
 
 void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) {
   // Look at all the uses, and see if they are double constants.
-  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
   int next_sreg = 0;
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 4be0f59..d5c2598 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -124,7 +124,7 @@
 bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
   SSARepresentation *ssa_rep = mir->ssa_rep;
   if (ssa_rep) {
-    uint64_t attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t attrs = GetDataFlowAttributes(mir);
     const int* uses = ssa_rep->uses;
     const int* defs = ssa_rep->defs;
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 0ad30be..bde0fae 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -905,13 +905,14 @@
 
 bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                                         bool* is_type_initialized, bool* use_direct_type_ptr,
-                                        uintptr_t* direct_type_ptr) {
+                                        uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     return false;
   }
+  *out_is_finalizable = resolved_class->IsFinalizable();
   const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
   if (compiling_boot) {
     // boot -> boot class pointers.
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d7d40d5..6ac9cf7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -210,7 +210,7 @@
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                           bool* is_type_initialized, bool* use_direct_type_ptr,
-                          uintptr_t* direct_type_ptr);
+                          uintptr_t* direct_type_ptr, bool* out_is_finalizable);
 
   // Get the DexCache for the
   mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index e6d983f..62817e7 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -587,7 +587,7 @@
 
   void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    Object* ref = obj->GetFieldObject<Object, kVerifyNone, false>(offset);
+    Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
     // image.
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1efdd38..b0aa63b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -409,7 +409,7 @@
       uint32_t method_idx = instruction.VRegB_35c();
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
-      instruction.GetArgs(args);
+      instruction.GetVarArgs(args);
       if (!BuildInvoke(instruction, dex_offset, method_idx, number_of_vreg_arguments, false, args, -1)) {
         return false;
       }
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index effc38e..5c839dd 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1107,7 +1107,7 @@
   // The offset is off by 8 due to the way the ARM CPUs read PC.
   offset -= 8;
   CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(CountOneBits(kBranchOffsetMask), offset)) << offset;
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
 
   // Properly preserve only the bits supported in the instruction.
   offset >>= 2;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index ce1c4de..1b050cf 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -19,7 +19,7 @@
 
 #include "assembler.h"
 
-#include "gtest/gtest.h"
+#include "common_runtime_test.h"  // For ScratchFile
 
 #include <cstdio>
 #include <cstdlib>
@@ -30,6 +30,10 @@
 
 namespace art {
 
+// Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
+// temp directory.
+static std::string tmpnam_;
+
 template<typename Ass, typename Reg, typename Imm>
 class AssemblerTest : public testing::Test {
  public:
@@ -203,6 +207,10 @@
   void SetUp() OVERRIDE {
     assembler_.reset(new Ass());
 
+    // Fake a runtime test for ScratchFile
+    std::string android_data;
+    CommonRuntimeTest::SetEnvironmentVariables(android_data);
+
     SetUpHelpers();
   }
 
@@ -667,7 +675,8 @@
   // Use a consistent tmpnam, so store it.
   std::string GetTmpnam() {
     if (tmpnam_.length() == 0) {
-      tmpnam_ = std::string(tmpnam(nullptr));
+      ScratchFile tmp;
+      tmpnam_ = tmp.GetFilename() + "asm";
     }
     return tmpnam_;
   }
@@ -677,7 +686,6 @@
   std::string resolved_assembler_cmd_;
   std::string resolved_objdump_cmd_;
   std::string resolved_disassemble_cmd_;
-  std::string tmpnam_;
 
   static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 45d3a97..9001f8a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -123,7 +123,7 @@
 
 int32_t MipsAssembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
   CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(CountOneBits(kBranchOffsetMask), offset)) << offset;
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
 
   // Properly preserve only the bits supported in the instruction.
   offset >>= 2;
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 102e126..0e1b25e 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -44,8 +44,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 3bbec71..eddaa0b 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -233,9 +233,9 @@
 
   mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = __builtin_popcount(spill_mask);
+  uint32_t numcores = POPCOUNT(spill_mask);
   uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = __builtin_popcount(fp_spill_mask);
+  uint32_t numfps = POPCOUNT(fp_spill_mask);
   uint32_t spill_size = (numcores + numfps) * 4;
   LOG(DEBUG) << "spill size: " << spill_size;
   uint8_t* prevframe = prevsp + spill_size;
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index c96ff60..0890fa9 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -47,8 +47,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
 
   if (spill_count > 0) {
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index b957708..0950e71 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -43,8 +43,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 437beb5..5d9ad2c 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -182,13 +182,14 @@
 #endif
 }
 
+static constexpr size_t kThinLockLoops = 100;
 
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
 extern "C" void art_quick_lock_object(void);
 #endif
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
   Thread* self = Thread::Current();
   // Create an object
   ScopedObjectAccess soa(self);
@@ -206,6 +207,70 @@
   LockWord lock_after = obj->GetLockWord(false);
   LockWord::LockState new_state = lock_after.GetState();
   EXPECT_EQ(LockWord::LockState::kThinLocked, new_state);
+  EXPECT_EQ(lock_after.ThinLockCount(), 0U);  // Thin lock starts count at zero
+
+  for (size_t i = 1; i < kThinLockLoops; ++i) {
+    Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+              reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+
+    // Check we're at lock count i
+
+    LockWord l_inc = obj->GetLockWord(false);
+    LockWord::LockState l_inc_state = l_inc.GetState();
+    EXPECT_EQ(LockWord::LockState::kThinLocked, l_inc_state);
+    EXPECT_EQ(l_inc.ThinLockCount(), i);
+  }
+
+  // TODO: Improve this test. Somehow force it to go to fat locked. But that needs another thread.
+
+#else
+  LOG(INFO) << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+class RandGen {
+ public:
+  explicit RandGen(uint32_t seed) : val_(seed) {}
+
+  uint32_t next() {
+    val_ = val_ * 48271 % 2147483647 + 13;
+    return val_;
+  }
+
+  uint32_t val_;
+};
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+extern "C" void art_quick_lock_object(void);
+extern "C" void art_quick_unlock_object(void);
+#endif
+
+TEST_F(StubTest, UnlockObject) {
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+  Thread* self = Thread::Current();
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  SirtRef<mirror::String> obj(soa.Self(),
+                              mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"));
+  LockWord lock = obj->GetLockWord(false);
+  LockWord::LockState old_state = lock.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, old_state);
+
+  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+          reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+
+  // This should be an illegal monitor state.
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
+
+  LockWord lock_after = obj->GetLockWord(false);
+  LockWord::LockState new_state = lock_after.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, new_state);
 
   Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
           reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
@@ -214,12 +279,94 @@
   LockWord::LockState new_state2 = lock_after2.GetState();
   EXPECT_EQ(LockWord::LockState::kThinLocked, new_state2);
 
+  Invoke3(reinterpret_cast<size_t>(obj.get()), 0U, 0U,
+          reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+
+  LockWord lock_after3 = obj->GetLockWord(false);
+  LockWord::LockState new_state3 = lock_after3.GetState();
+  EXPECT_EQ(LockWord::LockState::kUnlocked, new_state3);
+
+  // Stress test:
+  // Keep a number of objects and their locks in flight. Randomly lock or unlock one of them in
+  // each step.
+
+  RandGen r(0x1234);
+
+  constexpr size_t kNumberOfLocks = 10;  // Number of objects = lock
+  constexpr size_t kIterations = 10000;  // Number of iterations
+
+  size_t counts[kNumberOfLocks];
+  SirtRef<mirror::String>* objects[kNumberOfLocks];
+
+  // Initialize = allocate.
+  for (size_t i = 0; i < kNumberOfLocks; ++i) {
+    counts[i] = 0;
+    objects[i] = new SirtRef<mirror::String>(soa.Self(),
+                                             mirror::String::AllocFromModifiedUtf8(soa.Self(), ""));
+  }
+
+  for (size_t i = 0; i < kIterations; ++i) {
+    // Select which lock to update.
+    size_t index = r.next() % kNumberOfLocks;
+
+    bool lock;  // Whether to lock or unlock in this step.
+    if (counts[index] == 0) {
+      lock = true;
+    } else if (counts[index] == kThinLockLoops) {
+      lock = false;
+    } else {
+      // Randomly.
+      lock = r.next() % 2 == 0;
+    }
+
+    if (lock) {
+      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+              reinterpret_cast<uintptr_t>(&art_quick_lock_object), self);
+      counts[index]++;
+    } else {
+      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+              reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+      counts[index]--;
+    }
+
+    EXPECT_FALSE(self->IsExceptionPending());
+
+    // Check the new state.
+    LockWord lock_iter = objects[index]->get()->GetLockWord(false);
+    LockWord::LockState iter_state = lock_iter.GetState();
+    if (counts[index] > 0) {
+      EXPECT_EQ(LockWord::LockState::kThinLocked, iter_state);
+      EXPECT_EQ(counts[index] - 1, lock_iter.ThinLockCount());
+    } else {
+      EXPECT_EQ(LockWord::LockState::kUnlocked, iter_state);
+    }
+  }
+
+  // Unlock the remaining count times and then check it's unlocked. Then deallocate.
+  // Go reverse order to correctly handle SirtRefs.
+  for (size_t i = 0; i < kNumberOfLocks; ++i) {
+    size_t index = kNumberOfLocks - 1 - i;
+    size_t count = counts[index];
+    while (count > 0) {
+      Invoke3(reinterpret_cast<size_t>(objects[index]->get()), 0U, 0U,
+              reinterpret_cast<uintptr_t>(&art_quick_unlock_object), self);
+
+      count--;
+    }
+
+    LockWord lock_after4 = objects[index]->get()->GetLockWord(false);
+    LockWord::LockState new_state4 = lock_after4.GetState();
+    EXPECT_EQ(LockWord::LockState::kUnlocked, new_state4);
+
+    delete objects[index];
+  }
+
   // TODO: Improve this test. Somehow force it to go to fat locked. But that needs another thread.
 
 #else
-  LOG(INFO) << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA;
+  LOG(INFO) << "Skipping unlock_object as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
-  std::cout << "Skipping lock_object as I don't know how to do that on " << kRuntimeISA << std::endl;
+  std::cout << "Skipping unlock_object as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
 
@@ -699,6 +846,8 @@
     }
   }
 
+  // TODO: Deallocate things.
+
   // Tests done.
 #else
   LOG(INFO) << "Skipping string_compareto as I don't know how to do that on " << kRuntimeISA;
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 5cf3001..c68d76a 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -38,7 +38,7 @@
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
   DCHECK_EQ(method->GetFpSpillMask(), 0u);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 3f1f86d..29a7065 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -42,8 +42,8 @@
   mirror::ArtMethod* method = fr.GetMethod();
   uint32_t core_spills = method->GetCoreSpillMask();
   uint32_t fp_core_spills = method->GetFpSpillMask();
-  size_t spill_count = __builtin_popcount(core_spills);
-  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t spill_count = POPCOUNT(core_spills);
+  size_t fp_spill_count = POPCOUNT(fp_core_spills);
   size_t frame_size = method->GetFrameSizeInBytes();
   if (spill_count > 0) {
     // Lowest number spill is farthest away, walk registers and fill into context.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index cac6cfd..9ccf6c9 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -661,13 +661,61 @@
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
 DEFINE_FUNCTION art_quick_lock_object
-    int3
-    int3
+    testl %edi, %edi                      // Null check object/rdi.
+    jz   .Lslow_lock
+.Lretry_lock:
+    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word.
+    test LITERAL(0xC0000000), %ecx        // Test the 2 high bits.
+    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
+    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jnz  .Lalready_thin                   // Lock word contains a thin lock.
+    // unlocked case - %edx holds thread id with count of 0
+    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
+    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%edi)
+    jnz  .Lretry_lock                     // cmpxchg failed retry
+    ret
+.Lalready_thin:
+    cmpw %cx, %dx                         // do we hold the lock already?
+    jne  .Lslow_lock
+    addl LITERAL(65536), %ecx             // increment recursion count
+    test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
+    jne  .Lslow_lock                      // count overflowed so go slow
+    movl %ecx, LOCK_WORD_OFFSET(%edi)     // update lockword, cmpxchg not necessary as we hold lock
+    ret
+.Lslow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    movq %rsp, %rdx                       // pass SP
+    call PLT_SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
+    RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
 
 DEFINE_FUNCTION art_quick_unlock_object
-    int3
-    int3
+    testl %edi, %edi                      // null check object/edi
+    jz   .Lslow_unlock
+    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
+    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   .Lslow_unlock                    // lock word contains a monitor
+    cmpw %cx, %dx                         // does the thread id match?
+    jne  .Lslow_unlock
+    cmpl LITERAL(65536), %ecx
+    jae  .Lrecursive_thin_unlock
+    movl LITERAL(0), LOCK_WORD_OFFSET(%edi)
+    ret
+.Lrecursive_thin_unlock:
+    subl LITERAL(65536), %ecx
+    mov  %ecx, LOCK_WORD_OFFSET(%edi)
+    ret
+.Lslow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
+    movq %rsp, %rdx                       // pass SP
+    call PLT_SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
+    RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
 
 DEFINE_FUNCTION art_quick_is_assignable
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 12c0352..3df5101 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -201,7 +201,7 @@
 uint32_t BitVector::NumSetBits() const {
   uint32_t count = 0;
   for (uint32_t word = 0; word < storage_size_; word++) {
-    count += __builtin_popcount(storage_[word]);
+    count += POPCOUNT(storage_[word]);
   }
   return count;
 }
@@ -331,10 +331,10 @@
 
   uint32_t count = 0u;
   for (uint32_t word = 0u; word < word_end; word++) {
-    count += __builtin_popcount(storage[word]);
+    count += POPCOUNT(storage[word]);
   }
   if (partial_word_bits != 0u) {
-    count += __builtin_popcount(storage[word_end] & ~(0xffffffffu << partial_word_bits));
+    count += POPCOUNT(storage[word_end] & ~(0xffffffffu << partial_word_bits));
   }
   return count;
 }
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 6c53563..df88794 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -66,6 +66,9 @@
     SirtRef<mirror::DexCache> dex_cache(Thread::Current(), declaring_class->GetDexCache());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_string = ResolveString(dex_file, string_idx, dex_cache);
+    if (resolved_string != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedString(string_idx), resolved_string);
+    }
   }
   return resolved_string;
 }
@@ -80,6 +83,9 @@
     SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
+    if (resolved_type != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
+    }
   }
   return resolved_type;
 }
@@ -94,6 +100,9 @@
     SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
+    if (resolved_type != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
+    }
   }
   return resolved_type;
 }
@@ -110,6 +119,9 @@
     SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_method = ResolveMethod(dex_file, method_idx, dex_cache, class_loader, referrer, type);
+    if (resolved_method != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedMethod(method_idx), resolved_method);
+    }
   }
   return resolved_method;
 }
@@ -126,6 +138,9 @@
     SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
+    if (resolved_field != nullptr) {
+      DCHECK_EQ(dex_cache->GetResolvedField(field_idx), resolved_field);
+    }
   }
   return resolved_field;
 }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 723e32c..79d3690 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -57,6 +57,9 @@
 class ScratchFile {
  public:
   ScratchFile() {
+    // ANDROID_DATA needs to be set
+    CHECK_NE(static_cast<char*>(nullptr), getenv("ANDROID_DATA")) <<
+        "Are you subclassing RuntimeTest?";
     filename_ = getenv("ANDROID_DATA");
     filename_ += "/TmpFile-XXXXXX";
     int fd = mkstemp(&filename_[0]);
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 207b0b6..ad9491f 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -24,6 +24,68 @@
 //------------------------------------------------------------------------------
 // VRegA
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegA() const {
+  switch (FormatOf(Opcode())) {
+    case k10t: return true;
+    case k10x: return true;
+    case k11n: return true;
+    case k11x: return true;
+    case k12x: return true;
+    case k20t: return true;
+    case k21c: return true;
+    case k21h: return true;
+    case k21s: return true;
+    case k21t: return true;
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k22x: return true;
+    case k23x: return true;
+    case k30t: return true;
+    case k31c: return true;
+    case k31i: return true;
+    case k31t: return true;
+    case k32x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    case k51l: return true;
+    default: return false;
+  }
+}
+
+inline int32_t Instruction::VRegA() const {
+  switch (FormatOf(Opcode())) {
+    case k10t: return VRegA_10t();
+    case k10x: return VRegA_10x();
+    case k11n: return VRegA_11n();
+    case k11x: return VRegA_11x();
+    case k12x: return VRegA_12x();
+    case k20t: return VRegA_20t();
+    case k21c: return VRegA_21c();
+    case k21h: return VRegA_21h();
+    case k21s: return VRegA_21s();
+    case k21t: return VRegA_21t();
+    case k22b: return VRegA_22b();
+    case k22c: return VRegA_22c();
+    case k22s: return VRegA_22s();
+    case k22t: return VRegA_22t();
+    case k22x: return VRegA_22x();
+    case k23x: return VRegA_23x();
+    case k30t: return VRegA_30t();
+    case k31c: return VRegA_31c();
+    case k31i: return VRegA_31i();
+    case k31t: return VRegA_31t();
+    case k32x: return VRegA_32x();
+    case k35c: return VRegA_35c();
+    case k3rc: return VRegA_3rc();
+    case k51l: return VRegA_51l();
+    default:
+      LOG(FATAL) << "Tried to access vA of instruction " << Name() << " which has no A operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
 inline int8_t Instruction::VRegA_10t(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k10t);
   return static_cast<int8_t>(InstAA(inst_data));
@@ -147,6 +209,66 @@
 //------------------------------------------------------------------------------
 // VRegB
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegB() const {
+  switch (FormatOf(Opcode())) {
+    case k11n: return true;
+    case k12x: return true;
+    case k21c: return true;
+    case k21h: return true;
+    case k21s: return true;
+    case k21t: return true;
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k22x: return true;
+    case k23x: return true;
+    case k31c: return true;
+    case k31i: return true;
+    case k31t: return true;
+    case k32x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    case k51l: return true;
+    default: return false;
+  }
+}
+
+inline bool Instruction::HasWideVRegB() const {
+  return FormatOf(Opcode()) == k51l;
+}
+
+inline int32_t Instruction::VRegB() const {
+  switch (FormatOf(Opcode())) {
+    case k11n: return VRegB_11n();
+    case k12x: return VRegB_12x();
+    case k21c: return VRegB_21c();
+    case k21h: return VRegB_21h();
+    case k21s: return VRegB_21s();
+    case k21t: return VRegB_21t();
+    case k22b: return VRegB_22b();
+    case k22c: return VRegB_22c();
+    case k22s: return VRegB_22s();
+    case k22t: return VRegB_22t();
+    case k22x: return VRegB_22x();
+    case k23x: return VRegB_23x();
+    case k31c: return VRegB_31c();
+    case k31i: return VRegB_31i();
+    case k31t: return VRegB_31t();
+    case k32x: return VRegB_32x();
+    case k35c: return VRegB_35c();
+    case k3rc: return VRegB_3rc();
+    case k51l: return VRegB_51l();
+    default:
+      LOG(FATAL) << "Tried to access vB of instruction " << Name() << " which has no B operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
+inline uint64_t Instruction::WideVRegB() const {
+  return VRegB_51l();
+}
+
 inline int4_t Instruction::VRegB_11n(uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k11n);
   return static_cast<int4_t>((InstB(inst_data) << 28) >> 28);
@@ -246,6 +368,34 @@
 //------------------------------------------------------------------------------
 // VRegC
 //------------------------------------------------------------------------------
+inline bool Instruction::HasVRegC() const {
+  switch (FormatOf(Opcode())) {
+    case k22b: return true;
+    case k22c: return true;
+    case k22s: return true;
+    case k22t: return true;
+    case k23x: return true;
+    case k35c: return true;
+    case k3rc: return true;
+    default: return false;
+  }
+}
+
+inline int32_t Instruction::VRegC() const {
+  switch (FormatOf(Opcode())) {
+    case k22b: return VRegC_22b();
+    case k22c: return VRegC_22c();
+    case k22s: return VRegC_22s();
+    case k22t: return VRegC_22t();
+    case k23x: return VRegC_23x();
+    case k35c: return VRegC_35c();
+    case k3rc: return VRegC_3rc();
+    default:
+      LOG(FATAL) << "Tried to access vC of instruction " << Name() << " which has no C operand.";
+      exit(EXIT_FAILURE);
+  }
+}
+
 inline int8_t Instruction::VRegC_22b() const {
   DCHECK_EQ(FormatOf(Opcode()), k22b);
   return static_cast<int8_t>(Fetch16(1) >> 8);
@@ -281,7 +431,11 @@
   return Fetch16(2);
 }
 
-inline void Instruction::GetArgs(uint32_t arg[5], uint16_t inst_data) const {
+inline bool Instruction::HasVarArgs() const {
+  return FormatOf(Opcode()) == k35c;
+}
+
+inline void Instruction::GetVarArgs(uint32_t arg[5], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 7546245..0494f22 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -70,121 +70,6 @@
 #undef INSTRUCTION_SIZE
 };
 
-/*
- * Handy macros for helping decode instructions.
- */
-#define FETCH(_offset)      (insns[(_offset)])
-#define FETCH_uint32(_offset)   (fetch_uint32_impl((_offset), insns))
-#define INST_A(_insn)       (((uint16_t)(_insn) >> 8) & 0x0f)
-#define INST_B(_insn)       ((uint16_t)(_insn) >> 12)
-#define INST_AA(_insn)      ((_insn) >> 8)
-
-/* Helper for FETCH_uint32, above. */
-static inline uint32_t fetch_uint32_impl(uint32_t offset, const uint16_t* insns) {
-  return insns[offset] | ((uint32_t) insns[offset+1] << 16);
-}
-
-
-bool Instruction::HasVRegC() const {
-  switch (FormatOf(Opcode())) {
-    case k23x: return true;
-    case k35c: return true;
-    case k3rc: return true;
-    default: return false;
-  }
-}
-
-bool Instruction::HasVRegB() const {
-  switch (FormatOf(Opcode())) {
-    case k12x: return true;
-    case k22b: return true;
-    case k22c: return true;
-    case k22s: return true;
-    case k22t: return true;
-    case k22x: return true;
-    case k23x: return true;
-    case k32x: return true;
-    default: return false;
-  }
-}
-
-bool Instruction::HasVRegA() const {
-  switch (FormatOf(Opcode())) {
-    case k11n: return true;
-    case k11x: return true;
-    case k12x: return true;
-    case k21c: return true;
-    case k21h: return true;
-    case k21s: return true;
-    case k21t: return true;
-    case k22b: return true;
-    case k22c: return true;
-    case k22s: return true;
-    case k22t: return true;
-    case k22x: return true;
-    case k23x: return true;
-    case k31c: return true;
-    case k31i: return true;
-    case k31t: return true;
-    case k32x: return true;
-    case k51l: return true;
-    default: return false;
-  }
-}
-
-int32_t Instruction::VRegC() const {
-  switch (FormatOf(Opcode())) {
-    case k23x: return VRegC_23x();
-    case k35c: return VRegC_35c();
-    case k3rc: return VRegC_3rc();
-    default: LOG(FATAL) << "Tried to access vC of instruction " << Name() <<
-        " which has no C operand.";
-  }
-  return -1;
-}
-
-int32_t Instruction::VRegB() const {
-  switch (FormatOf(Opcode())) {
-    case k12x: return VRegB_12x();
-    case k22b: return VRegB_22b();
-    case k22c: return VRegB_22c();
-    case k22s: return VRegB_22s();
-    case k22t: return VRegB_22t();
-    case k22x: return VRegB_22x();
-    case k23x: return VRegB_23x();
-    case k32x: return VRegB_32x();
-    default: LOG(FATAL) << "Tried to access vB of instruction " << Name() <<
-        " which has no B operand.";
-  }
-  return -1;
-}
-
-int32_t Instruction::VRegA() const {
-  switch (FormatOf(Opcode())) {
-    case k11n: return VRegA_11n();
-    case k11x: return VRegA_11x();
-    case k12x: return VRegA_12x();
-    case k21c: return VRegA_21c();
-    case k21h: return VRegA_21h();
-    case k21s: return VRegA_21s();
-    case k21t: return VRegA_21t();
-    case k22b: return VRegA_22b();
-    case k22c: return VRegA_22c();
-    case k22s: return VRegA_22s();
-    case k22t: return VRegA_22t();
-    case k22x: return VRegA_22x();
-    case k23x: return VRegA_23x();
-    case k31c: return VRegA_31c();
-    case k31i: return VRegA_31i();
-    case k31t: return VRegA_31t();
-    case k32x: return VRegA_32x();
-    case k51l: return VRegA_51l();
-    default: LOG(FATAL) << "Tried to access vA of instruction " << Name() <<
-        " which has no A operand.";
-  }
-  return -1;
-}
-
 int32_t Instruction::GetTargetOffset() const {
   switch (FormatOf(Opcode())) {
     // Cases for conditional branches follow.
@@ -207,145 +92,6 @@
   return  FlagsOf(opcode) & Instruction::kContinue;
 }
 
-void Instruction::Decode(uint32_t &vA, uint32_t &vB, uint64_t &vB_wide, uint32_t &vC, uint32_t arg[]) const {
-  const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
-  uint16_t insn = *insns;
-  Code opcode = static_cast<Code>(insn & 0xFF);
-
-  switch (FormatOf(opcode)) {
-    case k10x:       // op
-      /* nothing to do; copy the AA bits out for the verifier */
-      vA = INST_AA(insn);
-      break;
-    case k12x:       // op vA, vB
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      break;
-    case k11n:       // op vA, #+B
-      vA = INST_A(insn);
-      vB = (int32_t) (INST_B(insn) << 28) >> 28;  // sign extend 4-bit value
-      break;
-    case k11x:       // op vAA
-      vA = INST_AA(insn);
-      break;
-    case k10t:       // op +AA
-      vA = (int8_t) INST_AA(insn);              // sign-extend 8-bit value
-      break;
-    case k20t:       // op +AAAA
-      vA = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k21c:       // op vAA, thing@BBBB
-    case k22x:       // op vAA, vBBBB
-      vA = INST_AA(insn);
-      vB = FETCH(1);
-      break;
-    case k21s:       // op vAA, #+BBBB
-    case k21t:       // op vAA, +BBBB
-      vA = INST_AA(insn);
-      vB = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k21h:       // op vAA, #+BBBB0000[00000000]
-      vA = INST_AA(insn);
-      /*
-       * The value should be treated as right-zero-extended, but we don't
-       * actually do that here. Among other things, we don't know if it's
-       * the top bits of a 32- or 64-bit value.
-       */
-      vB = FETCH(1);
-      break;
-    case k23x:       // op vAA, vBB, vCC
-      vA = INST_AA(insn);
-      vB = FETCH(1) & 0xff;
-      vC = FETCH(1) >> 8;
-      break;
-    case k22b:       // op vAA, vBB, #+CC
-      vA = INST_AA(insn);
-      vB = FETCH(1) & 0xff;
-      vC = (int8_t) (FETCH(1) >> 8);            // sign-extend 8-bit value
-      break;
-    case k22s:       // op vA, vB, #+CCCC
-    case k22t:       // op vA, vB, +CCCC
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      vC = (int16_t) FETCH(1);                   // sign-extend 16-bit value
-      break;
-    case k22c:       // op vA, vB, thing@CCCC
-      vA = INST_A(insn);
-      vB = INST_B(insn);
-      vC = FETCH(1);
-      break;
-    case k30t:       // op +AAAAAAAA
-      vA = FETCH_uint32(1);                     // signed 32-bit value
-      break;
-    case k31t:       // op vAA, +BBBBBBBB
-    case k31c:       // op vAA, string@BBBBBBBB
-      vA = INST_AA(insn);
-      vB = FETCH_uint32(1);                     // 32-bit value
-      break;
-    case k32x:       // op vAAAA, vBBBB
-      vA = FETCH(1);
-      vB = FETCH(2);
-      break;
-    case k31i:       // op vAA, #+BBBBBBBB
-      vA = INST_AA(insn);
-      vB = FETCH_uint32(1);                     // signed 32-bit value
-      break;
-    case k35c:       // op {vC, vD, vE, vF, vG}, thing@BBBB
-      {
-        /*
-         * Note that the fields mentioned in the spec don't appear in
-         * their "usual" positions here compared to most formats. This
-         * was done so that the field names for the argument count and
-         * reference index match between this format and the corresponding
-         * range formats (3rc and friends).
-         *
-         * Bottom line: The argument count is always in vA, and the
-         * method constant (or equivalent) is always in vB.
-         */
-        uint16_t regList;
-        int count;
-
-        vA = INST_B(insn);  // This is labeled A in the spec.
-        vB = FETCH(1);
-        regList = FETCH(2);
-
-        count = vA;
-
-        /*
-         * Copy the argument registers into the arg[] array, and
-         * also copy the first argument (if any) into vC. (The
-         * DecodedInstruction structure doesn't have separate
-         * fields for {vD, vE, vF, vG}, so there's no need to make
-         * copies of those.) Note that cases 5..2 fall through.
-         */
-        switch (count) {
-        case 5: arg[4] = INST_A(insn);
-        case 4: arg[3] = (regList >> 12) & 0x0f;
-        case 3: arg[2] = (regList >> 8) & 0x0f;
-        case 2: arg[1] = (regList >> 4) & 0x0f;
-        case 1: vC = arg[0] = regList & 0x0f; break;
-        case 0: break;  // Valid, but no need to do anything.
-        default:
-          LOG(ERROR) << "Invalid arg count in 35c (" << count << ")";
-          return;
-        }
-      }
-      break;
-    case k3rc:       // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
-      vA = INST_AA(insn);
-      vB = FETCH(1);
-      vC = FETCH(2);
-        break;
-    case k51l:       // op vAA, #+BBBBBBBBBBBBBBBB
-      vA = INST_AA(insn);
-      vB_wide = FETCH_uint32(1) | ((uint64_t) FETCH_uint32(3) << 32);
-      break;
-    default:
-      LOG(ERROR) << "Can't decode unexpected format " << FormatOf(opcode) << " (op=" << opcode << ")";
-      return;
-  }
-}
-
 size_t Instruction::SizeInCodeUnitsComplexOpcode() const {
   const uint16_t* insns = reinterpret_cast<const uint16_t*>(this);
   // Handle special NOP encoded variable length sequences.
@@ -549,7 +295,7 @@
       break;
     case k35c: {
       uint32_t arg[5];
-      GetArgs(arg);
+      GetVarArgs(arg);
       switch (Opcode()) {
         case FILLED_NEW_ARRAY:
         {
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 4352c4a..560e5ff 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -151,8 +151,7 @@
     kVerifyError           = 0x80000,
   };
 
-  // Decodes this instruction, populating its arguments.
-  void Decode(uint32_t &vA, uint32_t &vB, uint64_t &vB_wide, uint32_t &vC, uint32_t arg[]) const;
+  static constexpr uint32_t kMaxVarArgRegs = 5;
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -313,6 +312,9 @@
   bool HasVRegB() const;
   int32_t VRegB() const;
 
+  bool HasWideVRegB() const;
+  uint64_t WideVRegB() const;
+
   int4_t VRegB_11n() const {
     return VRegB_11n(Fetch16(0));
   }
@@ -365,9 +367,10 @@
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  void GetArgs(uint32_t args[5], uint16_t inst_data) const;
-  void GetArgs(uint32_t args[5]) const {
-    return GetArgs(args, Fetch16(0));
+  bool HasVarArgs() const;
+  void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
+  void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
+    return GetVarArgs(args, Fetch16(0));
   }
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
@@ -549,23 +552,6 @@
 std::ostream& operator<<(std::ostream& os, const Instruction::Flags& flags);
 std::ostream& operator<<(std::ostream& os, const Instruction::VerifyFlag& vflags);
 
-/*
- * Holds the contents of a decoded instruction.
- */
-struct DecodedInstruction {
-  uint32_t vA;
-  uint32_t vB;
-  uint64_t vB_wide;        /* for k51l */
-  uint32_t vC;
-  uint32_t arg[5];         /* vC/D/E/F/G in invoke or filled-new-array */
-  Instruction::Code opcode;
-
-  explicit DecodedInstruction(const Instruction* inst) {
-    inst->Decode(vA, vB, vB_wide, vC, arg);
-    opcode = inst->Opcode();
-  }
-};
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_DEX_INSTRUCTION_H_
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 05912bf..e52a8fb 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -132,8 +132,7 @@
     if (klass == nullptr) {
       return nullptr;
     }
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+    return klass->Alloc<kInstrumented>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
   }
   DCHECK(klass != nullptr);
   return klass->Alloc<kInstrumented>(self, allocator_type);
@@ -155,9 +154,11 @@
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+    // Pass in false since the object can not be finalizable.
+    return klass->Alloc<kInstrumented, false>(self, heap->GetCurrentAllocator());
   }
-  return klass->Alloc<kInstrumented>(self, allocator_type);
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
 // Given the context of a calling Method and an initialized class, create an instance.
@@ -169,7 +170,8 @@
                                                                            gc::AllocatorType allocator_type)
     NO_THREAD_SAFETY_ANALYSIS {
   DCHECK(klass != nullptr);
-  return klass->Alloc<kInstrumented>(self, allocator_type);
+  // Pass in false since the object can not be finalizable.
+  return klass->Alloc<kInstrumented, false>(self, allocator_type);
 }
 
 
diff --git a/runtime/gc/accounting/gc_allocator.h b/runtime/gc/accounting/gc_allocator.h
index 4fe9367..7dd7cca 100644
--- a/runtime/gc/accounting/gc_allocator.h
+++ b/runtime/gc/accounting/gc_allocator.h
@@ -73,7 +73,7 @@
 // GCAllocatorImpl<T> if kMeasureGCMemoryOverhead is true, std::allocator<T> otherwise.
 template <typename T>
 class GcAllocator : public TypeStaticIf<kMeasureGcMemoryOverhead, GcAllocatorImpl<T>,
-                                        std::allocator<T> >::value {
+                                        std::allocator<T> >::type {
 };
 
 }  // namespace accounting
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index f113030..a693659 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -584,7 +584,7 @@
   return AllocRun(self, idx);
 }
 
-void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
+inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
   Run* current_run = current_runs_[idx];
   DCHECK(current_run != nullptr);
   void* slot_addr = current_run->AllocSlot();
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index b25f7ff..9e73f14 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -29,7 +29,9 @@
     case kGcCauseBackground: return "Background";
     case kGcCauseExplicit: return "Explicit";
     case kGcCauseForNativeAlloc: return "NativeAlloc";
-    case kGcCauseCollectorTransition: return" CollectorTransition";
+    case kGcCauseCollectorTransition: return "CollectorTransition";
+    case kGcCauseDisableMovingGc: return "DisableMovingGc";
+    case kGcCauseTrim: return "HeapTrim";
     default:
       LOG(FATAL) << "Unreachable";
   }
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 7499b9e..10e6667 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -35,6 +35,10 @@
   kGcCauseForNativeAlloc,
   // GC triggered for a collector transition.
   kGcCauseCollectorTransition,
+  // Not a real GC cause, used when we disable moving GC (currently for GetPrimitiveArrayCritical).
+  kGcCauseDisableMovingGc,
+  // Not a real GC cause, used when we trim the heap.
+  kGcCauseTrim,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 5d517bb..dab668f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -498,7 +498,7 @@
   MutexLock mu(self, *gc_complete_lock_);
   ++disable_moving_gc_count_;
   if (IsMovingGc(collector_type_running_)) {
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(kGcCauseDisableMovingGc, self);
   }
 }
 
@@ -962,7 +962,7 @@
     // trimming.
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(kGcCauseTrim, self);
     collector_type_running_ = kCollectorTypeHeapTrim;
   }
   uint64_t start_ns = NanoTime();
@@ -1171,7 +1171,7 @@
   SirtRef<mirror::Class> sirt_klass(self, *klass);
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  collector::GcType last_gc = WaitForGcToComplete(self);
+  collector::GcType last_gc = WaitForGcToComplete(kGcCauseForAlloc, self);
   if (last_gc != collector::kGcTypeNone) {
     // If we were the default allocator but the allocator changed while we were suspended,
     // abort the allocation.
@@ -1418,7 +1418,7 @@
       ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
       MutexLock mu(self, *gc_complete_lock_);
       // Ensure there is only one GC at a time.
-      WaitForGcToCompleteLocked(self);
+      WaitForGcToCompleteLocked(kGcCauseCollectorTransition, self);
       // If someone else beat us to it and changed the collector before we could, exit.
       // This is safe to do before the suspend all since we set the collector_type_running_ before
       // we exit the loop. If another thread attempts to do the heap transition before we exit,
@@ -1819,7 +1819,7 @@
     ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
-    WaitForGcToCompleteLocked(self);
+    WaitForGcToCompleteLocked(gc_cause, self);
     compacting_gc = IsMovingGc(collector_type_);
     // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
     if (compacting_gc && disable_moving_gc_count_ != 0) {
@@ -2448,13 +2448,13 @@
   }
 }
 
-collector::GcType Heap::WaitForGcToComplete(Thread* self) {
+collector::GcType Heap::WaitForGcToComplete(GcCause cause, Thread* self) {
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
-  return WaitForGcToCompleteLocked(self);
+  return WaitForGcToCompleteLocked(cause, self);
 }
 
-collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
+collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
   uint64_t wait_start = NanoTime();
   while (collector_type_running_ != kCollectorTypeNone) {
@@ -2467,7 +2467,8 @@
   uint64_t wait_time = NanoTime() - wait_start;
   total_wait_time_ += wait_time;
   if (wait_time > long_pause_log_threshold_) {
-    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time);
+    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time)
+        << " for cause " << cause;
   }
   return last_gc_type;
 }
@@ -2611,12 +2612,14 @@
   non_moving_space_->ClearGrowthLimit();
 }
 
-void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
+void Heap::AddFinalizerReference(Thread* self, mirror::Object** object) {
   ScopedObjectAccess soa(self);
-  ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(object));
+  ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(*object));
   jvalue args[1];
   args[0].l = arg.get();
   InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_FinalizerReference_add, args);
+  // Restore object in case it gets moved.
+  *object = soa.Decode<mirror::Object*>(arg.get());
 }
 
 void Heap::EnqueueClearedReferences() {
@@ -2659,7 +2662,7 @@
     return;
   }
   // Wait for any GCs currently running to finish.
-  if (WaitForGcToComplete(self) == collector::kGcTypeNone) {
+  if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) {
     // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
     // instead. E.g. can't do partial, so do full instead.
     if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
@@ -2792,7 +2795,7 @@
     // The second watermark is higher than the gc watermark. If you hit this it means you are
     // allocating native objects faster than the GC can keep up with.
     if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-      if (WaitForGcToComplete(self) != collector::kGcTypeNone) {
+      if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) {
         // Just finished a GC, attempt to run finalizers.
         RunFinalization(env);
         CHECK(!env->ExceptionCheck());
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index d3b5cdc..7eafd34 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -300,7 +300,8 @@
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
   // waited for.
-  collector::GcType WaitForGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  collector::GcType WaitForGcToComplete(GcCause cause, Thread* self)
+      LOCKS_EXCLUDED(gc_complete_lock_);
 
   // Update the heap's process state to a new value, may cause compaction to occur.
   void UpdateProcessState(ProcessState process_state);
@@ -380,7 +381,7 @@
     return card_table_.get();
   }
 
-  void AddFinalizerReference(Thread* self, mirror::Object* object);
+  void AddFinalizerReference(Thread* self, mirror::Object** object);
 
   // Returns the number of bytes currently allocated.
   size_t GetBytesAllocated() const {
@@ -641,7 +642,7 @@
 
   // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
   // waited for.
-  collector::GcType WaitForGcToCompleteLocked(Thread* self)
+  collector::GcType WaitForGcToCompleteLocked(GcCause cause, Thread* self)
       EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
   void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time)
diff --git a/runtime/globals.h b/runtime/globals.h
index e3c54b8..eb52a46 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -20,6 +20,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "read_barrier_c.h"
+#include "read_barrier_option.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 5660508..da3e8ea 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -85,7 +85,7 @@
     if (is_range) {
       vregC = inst->VRegC_3rc();
     } else {
-      inst->GetArgs(arg, inst_data);
+      inst->GetVarArgs(arg, inst_data);
     }
 
     // Handle receiver apart since it's not part of the shorty.
@@ -224,7 +224,7 @@
   if (is_range) {
     vregC = inst->VRegC_3rc();
   } else {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
   for (int32_t i = 0; i < length; ++i) {
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index bc8d348..65799cd 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,11 +27,11 @@
 namespace art {
 namespace mirror {
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
   size_t component_size =
-      GetClass<kVerifyFlags, kDoReadBarrier>()->template GetComponentSize<kDoReadBarrier>();
+      GetClass<kVerifyFlags, kReadBarrierOption>()->template GetComponentSize<kReadBarrierOption>();
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 92f0e67..eead4eb 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -42,7 +42,8 @@
                                  const SirtRef<IntArray>& dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index f61a01d..71f0210 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -411,7 +411,7 @@
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
 
-  template <bool kDoReadBarrier = true>
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetJavaLangReflectArtMethod() {
     // This does not need a RB because it is a root.
     return java_lang_reflect_ArtMethod_;
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 8a1f383..a556a1c 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -470,12 +470,21 @@
   DCHECK_GE(this->object_size_, sizeof(Object));
 }
 
-template <bool kIsInstrumented>
+template<bool kIsInstrumented, bool kCheckAddFinalizer>
 inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
   CheckObjectAlloc();
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  return heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_,
-                                                                allocator_type, VoidFunctor());
+  const bool add_finalizer = kCheckAddFinalizer && IsFinalizable();
+  if (!kCheckAddFinalizer) {
+    DCHECK(!IsFinalizable());
+  }
+  mirror::Object* obj =
+      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_,
+                                                             allocator_type, VoidFunctor());
+  if (add_finalizer && LIKELY(obj != nullptr)) {
+    heap->AddFinalizerReference(self, &obj);
+  }
+  return obj;
 }
 
 inline Object* Class::AllocObject(Thread* self) {
@@ -492,17 +501,17 @@
   VisitStaticFieldsReferences<kVisitClass>(this, visitor);
 }
 
-template<bool kDoReadBarrier>
+template<ReadBarrierOption kReadBarrierOption>
 bool Class::IsArtFieldClass() {
-  Class* java_lang_Class = GetClass<kVerifyNone, kDoReadBarrier>();
+  Class* java_lang_Class = GetClass<kVerifyNone, kReadBarrierOption>();
   Class* java_lang_reflect_ArtField =
-      java_lang_Class->GetInstanceField(0)->GetClass<kVerifyNone, kDoReadBarrier>();
+      java_lang_Class->GetInstanceField(0)->GetClass<kVerifyNone, kReadBarrierOption>();
   return this == java_lang_reflect_ArtField;
 }
 
-template<bool kDoReadBarrier>
+template<ReadBarrierOption kReadBarrierOption>
 bool Class::IsArtMethodClass() {
-  return this == ArtMethod::GetJavaLangReflectArtMethod<kDoReadBarrier>();
+  return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 64a849b..662303e 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -257,7 +257,7 @@
     for (Class* c = this; c != NULL; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
-    CHECK_EQ((size_t)__builtin_popcount(new_reference_offsets), count);
+    CHECK_EQ((size_t)POPCOUNT(new_reference_offsets), count);
   }
   // Not called within a transaction.
   SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, reference_instance_offsets_),
@@ -268,7 +268,7 @@
   if (new_reference_offsets != CLASS_WALK_SUPER) {
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
-    CHECK_EQ((size_t)__builtin_popcount(new_reference_offsets),
+    CHECK_EQ((size_t)POPCOUNT(new_reference_offsets),
              NumReferenceStaticFieldsDuringLinking());
   }
   // Not called within a transaction.
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 23211c2..00ecead 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -360,9 +360,10 @@
     return depth;
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetComponentType<kVerifyFlags, kDoReadBarrier>() != NULL;
+    return GetComponentType<kVerifyFlags, kReadBarrierOption>() != NULL;
   }
 
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -371,19 +372,20 @@
 
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<bool kDoReadBarrier = true>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtFieldClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<bool kDoReadBarrier = true>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethodClass();
 
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* GetComponentType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<Class, kVerifyFlags, kDoReadBarrier>(ComponentTypeOffset());
+    return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(ComponentTypeOffset());
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -393,10 +395,10 @@
     SetFieldObject<false, false>(ComponentTypeOffset(), new_component_type);
   }
 
-  template<bool kDoReadBarrier = true>
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Primitive::ComponentSize(
-        GetComponentType<kDefaultVerifyFlags, kDoReadBarrier>()->GetPrimitiveType());
+        GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveType());
   }
 
   bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -412,7 +414,7 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  template <bool kIsInstrumented>
+  template<bool kIsInstrumented, bool kCheckAddFinalizer = true>
   ALWAYS_INLINE Object* Alloc(Thread* self, gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -427,7 +429,8 @@
     return IsClassClass() || IsArrayClass();
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, class_size_));
   }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index c70a08d..064fe30 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -34,9 +34,9 @@
 namespace art {
 namespace mirror {
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Object::GetClass() {
-  return GetFieldObject<Class, kVerifyFlags, kDoReadBarrier>(
+  return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Object, klass_));
 }
 
@@ -105,7 +105,8 @@
 inline Object* Object::GetReadBarrierPointer() {
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
   DCHECK(kUseBakerOrBrooksReadBarrier);
-  return GetFieldObject<Object, kVerifyNone, false>(OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), false);
+  return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
+      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
   LOG(FATAL) << "Unreachable";
   return nullptr;
@@ -118,7 +119,7 @@
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
   SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr, false);
+      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
 #endif
@@ -181,17 +182,17 @@
   return klass->IsAssignableFrom(GetClass<kVerifyFlags>());
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsClass() {
-  Class* java_lang_Class =
-      GetClass<kVerifyFlags, kDoReadBarrier>()->template GetClass<kVerifyFlags, kDoReadBarrier>();
-  return GetClass<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis), kDoReadBarrier>() ==
-      java_lang_Class;
+  Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetClass<kVerifyFlags, kReadBarrierOption>();
+  return GetClass<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis),
+      kReadBarrierOption>() == java_lang_Class;
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Class* Object::AsClass() {
-  DCHECK((IsClass<kVerifyFlags, kDoReadBarrier>()));
+  DCHECK((IsClass<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Class*>(this);
 }
 
@@ -208,15 +209,16 @@
   return down_cast<ObjectArray<T>*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArrayInstance() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->
-      template IsArrayClass<kVerifyFlags, kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArrayClass<kVerifyFlags, kReadBarrierOption>();
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtField() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->template IsArtFieldClass<kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArtFieldClass<kReadBarrierOption>();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -225,9 +227,10 @@
   return down_cast<ArtField*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtMethod() {
-  return GetClass<kVerifyFlags, kDoReadBarrier>()->template IsArtMethodClass<kDoReadBarrier>();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArtMethodClass<kReadBarrierOption>();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -247,9 +250,9 @@
   return down_cast<Reference*>(this);
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline Array* Object::AsArray() {
-  DCHECK((IsArrayInstance<kVerifyFlags, kDoReadBarrier>()));
+  DCHECK((IsArrayInstance<kVerifyFlags, kReadBarrierOption>()));
   return down_cast<Array*>(this);
 }
 
@@ -375,21 +378,23 @@
   return GetClass<kVerifyFlags>()->IsPhantomReferenceClass();
 }
 
-template<VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline size_t Object::SizeOf() {
   size_t result;
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  if (IsArrayInstance<kVerifyFlags, kDoReadBarrier>()) {
-    result = AsArray<kNewFlags, kDoReadBarrier>()->template SizeOf<kNewFlags, kDoReadBarrier>();
-  } else if (IsClass<kNewFlags, kDoReadBarrier>()) {
-    result = AsClass<kNewFlags, kDoReadBarrier>()->template SizeOf<kNewFlags, kDoReadBarrier>();
+  if (IsArrayInstance<kVerifyFlags, kReadBarrierOption>()) {
+    result = AsArray<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags, kReadBarrierOption>();
+  } else if (IsClass<kNewFlags, kReadBarrierOption>()) {
+    result = AsClass<kNewFlags, kReadBarrierOption>()->
+        template SizeOf<kNewFlags, kReadBarrierOption>();
   } else {
-    result = GetClass<kNewFlags, kDoReadBarrier>()->GetObjectSize();
+    result = GetClass<kNewFlags, kReadBarrierOption>()->GetObjectSize();
   }
   DCHECK_GE(result, sizeof(Object))
-      << " class=" << PrettyTypeOf(GetClass<kNewFlags, kDoReadBarrier>());
-  DCHECK(!(IsArtField<kNewFlags, kDoReadBarrier>())  || result == sizeof(ArtField));
-  DCHECK(!(IsArtMethod<kNewFlags, kDoReadBarrier>()) || result == sizeof(ArtMethod));
+      << " class=" << PrettyTypeOf(GetClass<kNewFlags, kReadBarrierOption>());
+  DCHECK(!(IsArtField<kNewFlags, kReadBarrierOption>())  || result == sizeof(ArtField));
+  DCHECK(!(IsArtMethod<kNewFlags, kReadBarrierOption>()) || result == sizeof(ArtMethod));
   return result;
 }
 
@@ -532,14 +537,15 @@
   return QuasiAtomic::Cas64(old_value, new_value, addr);
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier, bool kIsVolatile>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption,
+         bool kIsVolatile>
 inline T* Object::GetFieldObject(MemberOffset field_offset) {
   if (kVerifyFlags & kVerifyThis) {
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
-  T* result = ReadBarrier::Barrier<T, kDoReadBarrier>(this, field_offset, objref_addr);
+  T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
   if (kIsVolatile) {
     QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
   }
@@ -549,9 +555,9 @@
   return result;
 }
 
-template<class T, VerifyObjectFlags kVerifyFlags, bool kDoReadBarrier>
+template<class T, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* Object::GetFieldObjectVolatile(MemberOffset field_offset) {
-  return GetFieldObject<T, kVerifyFlags, kDoReadBarrier, true>(field_offset);
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption, true>(field_offset);
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 2cd71a0..2f775bc 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -92,9 +92,7 @@
     heap->WriteBarrierEveryFieldOf(dest);
   }
   if (c->IsFinalizable()) {
-    SirtRef<Object> sirt_dest(self, dest);
-    heap->AddFinalizerReference(self, dest);
-    return sirt_dest.get();
+    heap->AddFinalizerReference(self, &dest);
   }
   return dest;
 }
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index cf28b18..442909d 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -67,7 +67,8 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* GetClass() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -86,7 +87,8 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool InstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Object* Clone(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -113,9 +115,11 @@
   void Wait(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Wait(Thread* self, int64_t timeout, int32_t nanos) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* AsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -123,9 +127,11 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ObjectArray<T>* AsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArrayInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Array* AsArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -158,12 +164,14 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Throwable* AsThrowable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* AsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kDoReadBarrier = true>
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtField* AsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -185,12 +193,12 @@
 
   // Accessor for Java type fields.
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-      bool kDoReadBarrier = true, bool kIsVolatile = false>
+      ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kIsVolatile = false>
   T* GetFieldObject(MemberOffset field_offset) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-      bool kDoReadBarrier = true>
+      ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   T* GetFieldObjectVolatile(MemberOffset field_offset) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index c1a48e9..4b4b3d0 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '2', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '2', '3', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 8bd8dba..d8fc277 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -174,26 +174,24 @@
     return false;
   }
 
-  uint32_t return_reg = return_instruction->VRegA_11x();
+  int32_t return_reg = return_instruction->VRegA_11x();
   DCHECK_LT(return_reg, code_item->registers_size_);
 
-  uint32_t vA, vB, dummy;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, dummy, nullptr);
+  int32_t const_value = instruction->VRegB();
   if (instruction->Opcode() == Instruction::CONST_HIGH16) {
-    vB <<= 16;
+    const_value <<= 16;
   }
-  DCHECK_LT(vA, code_item->registers_size_);
-  if (vA != return_reg) {
+  DCHECK_LT(instruction->VRegA(), code_item->registers_size_);
+  if (instruction->VRegA() != return_reg) {
     return false;  // Not returning the value set by const?
   }
-  if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
+  if (return_opcode == Instruction::RETURN_OBJECT && const_value != 0) {
     return false;  // Returning non-null reference constant?
   }
   if (result != nullptr) {
     result->opcode = kInlineOpNonWideConst;
     result->flags = kInlineSpecial;
-    result->d.data = static_cast<uint64_t>(vB);
+    result->d.data = static_cast<uint64_t>(const_value);
   }
   return true;
 }
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index ea2f830..88e2f8f 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -23,17 +23,18 @@
 
 namespace art {
 
-template <typename MirrorType, bool kDoReadBarrier>
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   // Unused for now.
   UNUSED(obj);
   UNUSED(offset);
   UNUSED(ref_addr);
-  if (kDoReadBarrier && kUseBakerReadBarrier) {
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
     // To be implemented.
     return ref_addr->AsMirrorPtr();
-  } else if (kDoReadBarrier && kUseBrooksReadBarrier) {
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
     // To be implemented.
     return ref_addr->AsMirrorPtr();
   } else {
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 6f59004..73c3d43 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -33,7 +33,7 @@
 
 class ReadBarrier {
  public:
-  template <typename MirrorType, bool kDoReadBarrier = true>
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/read_barrier_option.h b/runtime/read_barrier_option.h
new file mode 100644
index 0000000..7de8b8a
--- /dev/null
+++ b/runtime/read_barrier_option.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_READ_BARRIER_OPTION_H_
+#define ART_RUNTIME_READ_BARRIER_OPTION_H_
+namespace art {
+
+// Options for performing a read barrier or not.
+enum ReadBarrierOption {
+  kWithReadBarrier,     // Perform a read barrier.
+  kWithoutReadBarrier,  // Don't perform a read barrier.
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_READ_BARRIER_OPTION_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 20df78e..027feee 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -160,7 +160,7 @@
   Trace::Shutdown();
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForGcToComplete(self);
+  heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
@@ -1010,8 +1010,8 @@
                              (1 << art::arm::S27) | (1 << art::arm::S28) | (1 << art::arm::S29) |
                              (1 << art::arm::S30) | (1 << art::arm::S31);
     uint32_t fp_spills = type == kSaveAll ? fp_all_spills : 0;
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                 __builtin_popcount(fp_spills) /* fprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                 POPCOUNT(fp_spills) /* fprs */ +
                                  1 /* Method* */) * kArmPointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1024,7 +1024,7 @@
     uint32_t all_spills = (1 << art::mips::S0) | (1 << art::mips::S1);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                            (type == kSaveAll ? all_spills : 0) | (1 << art::mips::RA);
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
                                 (type == kRefsAndArgs ? 0 : 3) + 1 /* Method* */) *
                                 kMipsPointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
@@ -1035,7 +1035,7 @@
     uint32_t arg_spills = (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                          (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
                                  1 /* Method* */) * kX86PointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1054,8 +1054,8 @@
         (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) |
         (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7);
     uint32_t fp_spills = (type == kRefsAndArgs ? fp_arg_spills : 0);
-    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                 __builtin_popcount(fp_spills) /* fprs */ +
+    size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                 POPCOUNT(fp_spills) /* fprs */ +
                                  1 /* Method* */) * kX86_64PointerSize, kStackAlignment);
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
@@ -1094,8 +1094,8 @@
                           (1 << art::arm64::D31);
       uint32_t fp_spills = fp_ref_spills | (type == kRefsAndArgs ? fp_arg_spills: 0)
                           | (type == kSaveAll ? fp_all_spills : 0);
-      size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
-                                   __builtin_popcount(fp_spills) /* fprs */ +
+      size_t frame_size = RoundUp((POPCOUNT(core_spills) /* gprs */ +
+                                   POPCOUNT(fp_spills) /* fprs */ +
                                    1 /* Method* */) * kArm64PointerSize, kStackAlignment);
       method->SetFrameSizeInBytes(frame_size);
       method->SetCoreSpillMask(core_spills);
diff --git a/runtime/stack.h b/runtime/stack.h
index afc4f25..73a823a 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -25,6 +25,7 @@
 #include "instruction_set.h"
 #include "mirror/object.h"
 #include "mirror/object_reference.h"
+#include "utils.h"
 #include "verify_object.h"
 
 #include <stdint.h>
@@ -638,8 +639,8 @@
                            size_t frame_size, int reg, InstructionSet isa) {
     DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
     DCHECK_NE(reg, static_cast<int>(kVRegInvalid));
-    int spill_size = __builtin_popcount(core_spills) * GetBytesPerGprSpillLocation(isa)
-        + __builtin_popcount(fp_spills) * GetBytesPerFprSpillLocation(isa)
+    int spill_size = POPCOUNT(core_spills) * GetBytesPerGprSpillLocation(isa)
+        + POPCOUNT(fp_spills) * GetBytesPerFprSpillLocation(isa)
         + sizeof(uint32_t);  // Filler.
     int num_ins = code_item->ins_size_;
     int num_regs = code_item->registers_size_ - num_ins;
diff --git a/runtime/utils.h b/runtime/utils.h
index 4b2f230..14a532e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -47,7 +47,7 @@
 };
 
 template<typename T>
-static inline bool IsPowerOfTwo(T x) {
+static constexpr bool IsPowerOfTwo(T x) {
   return (x & (x - 1)) == 0;
 }
 
@@ -115,39 +115,46 @@
 }
 
 // A static if which determines whether to return type A or B based on the condition boolean.
-template <const bool condition, typename A, typename B>
+template <bool condition, typename A, typename B>
 struct TypeStaticIf {
-  typedef A value;
+  typedef A type;
 };
 
 // Specialization to handle the false case.
 template <typename A, typename B>
 struct TypeStaticIf<false, A,  B> {
-  typedef B value;
+  typedef B type;
+};
+
+// Type identity.
+template <typename T>
+struct TypeIdentity {
+  typedef T type;
 };
 
 // For rounding integers.
 template<typename T>
-static inline T RoundDown(T x, int n) {
-  DCHECK(IsPowerOfTwo(n));
-  return (x & -n);
+static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) {
+  return
+      // DCHECK(IsPowerOfTwo(n)) in a form acceptable in a constexpr function:
+      (kIsDebugBuild && !IsPowerOfTwo(n)) ? (LOG(FATAL) << n << " isn't a power of 2", T(0))
+      : (x & -n);
 }
 
 template<typename T>
-static inline T RoundUp(T x, int n) {
+static constexpr T RoundUp(T x, typename TypeIdentity<T>::type n) {
   return RoundDown(x + n - 1, n);
 }
 
 // For aligning pointers.
 template<typename T>
-static inline T* AlignDown(T* x, int n) {
-  CHECK(IsPowerOfTwo(n));
-  return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) & -static_cast<uintptr_t>(n));
+static inline T* AlignDown(T* x, uintptr_t n) {
+  return reinterpret_cast<T*>(RoundDown(reinterpret_cast<uintptr_t>(x), n));
 }
 
 template<typename T>
-static inline T* AlignUp(T* x, int n) {
-  return AlignDown(reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(x) + static_cast<uintptr_t>(n - 1)), n);
+static inline T* AlignUp(T* x, uintptr_t n) {
+  return reinterpret_cast<T*>(RoundUp(reinterpret_cast<uintptr_t>(x), n));
 }
 
 // Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
@@ -162,33 +169,25 @@
   return x + 1;
 }
 
-// Implementation is from "Hacker's Delight" by Henry S. Warren, Jr.,
-// figure 5-2, page 66, where the function is called pop.
-static inline int CountOneBits(uint32_t x) {
-  x = x - ((x >> 1) & 0x55555555);
-  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-  x = (x + (x >> 4)) & 0x0F0F0F0F;
-  x = x + (x >> 8);
-  x = x + (x >> 16);
-  return static_cast<int>(x & 0x0000003F);
+template<typename T>
+static constexpr int CLZ(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_clz(x)
+      : __builtin_clzll(x);
 }
 
 template<typename T>
-static inline int CLZ(T x) {
-  if (sizeof(T) == sizeof(uint32_t)) {
-    return __builtin_clz(x);
-  } else {
-    return __builtin_clzll(x);
-  }
+static constexpr int CTZ(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_ctz(x)
+      : __builtin_ctzll(x);
 }
 
 template<typename T>
-static inline int CTZ(T x) {
-  if (sizeof(T) == sizeof(uint32_t)) {
-    return __builtin_ctz(x);
-  } else {
-    return __builtin_ctzll(x);
-  }
+static constexpr int POPCOUNT(T x) {
+  return (sizeof(T) == sizeof(uint32_t))
+      ? __builtin_popcount(x)
+      : __builtin_popcountll(x);
 }
 
 static inline uint32_t PointerToLowMemUInt32(const void* p) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 91170f0..03ceed3 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -649,54 +649,53 @@
 }
 
 bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
-  DecodedInstruction dec_insn(inst);
   bool result = true;
   switch (inst->GetVerifyTypeArgumentA()) {
     case Instruction::kVerifyRegA:
-      result = result && CheckRegisterIndex(dec_insn.vA);
+      result = result && CheckRegisterIndex(inst->VRegA());
       break;
     case Instruction::kVerifyRegAWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vA);
+      result = result && CheckWideRegisterIndex(inst->VRegA());
       break;
   }
   switch (inst->GetVerifyTypeArgumentB()) {
     case Instruction::kVerifyRegB:
-      result = result && CheckRegisterIndex(dec_insn.vB);
+      result = result && CheckRegisterIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBField:
-      result = result && CheckFieldIndex(dec_insn.vB);
+      result = result && CheckFieldIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBMethod:
-      result = result && CheckMethodIndex(dec_insn.vB);
+      result = result && CheckMethodIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBNewInstance:
-      result = result && CheckNewInstance(dec_insn.vB);
+      result = result && CheckNewInstance(inst->VRegB());
       break;
     case Instruction::kVerifyRegBString:
-      result = result && CheckStringIndex(dec_insn.vB);
+      result = result && CheckStringIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBType:
-      result = result && CheckTypeIndex(dec_insn.vB);
+      result = result && CheckTypeIndex(inst->VRegB());
       break;
     case Instruction::kVerifyRegBWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vB);
+      result = result && CheckWideRegisterIndex(inst->VRegB());
       break;
   }
   switch (inst->GetVerifyTypeArgumentC()) {
     case Instruction::kVerifyRegC:
-      result = result && CheckRegisterIndex(dec_insn.vC);
+      result = result && CheckRegisterIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCField:
-      result = result && CheckFieldIndex(dec_insn.vC);
+      result = result && CheckFieldIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCNewArray:
-      result = result && CheckNewArray(dec_insn.vC);
+      result = result && CheckNewArray(inst->VRegC());
       break;
     case Instruction::kVerifyRegCType:
-      result = result && CheckTypeIndex(dec_insn.vC);
+      result = result && CheckTypeIndex(inst->VRegC());
       break;
     case Instruction::kVerifyRegCWide:
-      result = result && CheckWideRegisterIndex(dec_insn.vC);
+      result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
   }
   switch (inst->GetVerifyExtraFlags()) {
@@ -709,11 +708,14 @@
     case Instruction::kVerifySwitchTargets:
       result = result && CheckSwitchTargets(code_offset);
       break;
-    case Instruction::kVerifyVarArg:
-      result = result && CheckVarArgRegs(dec_insn.vA, dec_insn.arg);
+    case Instruction::kVerifyVarArg: {
+      uint32_t args[Instruction::kMaxVarArgRegs];
+      inst->GetVarArgs(args);
+      result = result && CheckVarArgRegs(inst->VRegA(), args);
       break;
+    }
     case Instruction::kVerifyVarArgRange:
-      result = result && CheckVarArgRangeRegs(dec_insn.vA, dec_insn.vC);
+      result = result && CheckVarArgRangeRegs(inst->VRegA(), inst->VRegC());
       break;
     case Instruction::kVerifyError:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected opcode " << inst->Name();
@@ -1009,7 +1011,7 @@
 }
 
 bool MethodVerifier::CheckVarArgRegs(uint32_t vA, uint32_t arg[]) {
-  if (vA > 5) {
+  if (vA > Instruction::kMaxVarArgRegs) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << vA << ") in non-range invoke)";
     return false;
   }
@@ -3075,7 +3077,7 @@
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   for (size_t param_index = 0; param_index < params_size; param_index++) {
     if (actual_args >= expected_args) {
@@ -3189,7 +3191,7 @@
   size_t params_size = params == NULL ? 0 : params->Size();
   uint32_t arg[5];
   if (!is_range) {
-    inst->GetArgs(arg);
+    inst->GetVarArgs(arg);
   }
   size_t actual_args = 1;
   for (size_t param_index = 0; param_index < params_size; param_index++) {
@@ -3255,7 +3257,7 @@
       uint32_t arg_count = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
       uint32_t arg[5];
       if (!is_range) {
-        inst->GetArgs(arg);
+        inst->GetVarArgs(arg);
       }
       for (size_t ui = 0; ui < arg_count; ui++) {
         uint32_t get_reg = is_range ? inst->VRegC_3rc() + ui : arg[ui];
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
index 2fbaebe..9821753 100644
--- a/runtime/vmap_table.h
+++ b/runtime/vmap_table.h
@@ -99,7 +99,7 @@
       }
       matches++;
     }
-    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(__builtin_popcount(spill_mask)));
+    CHECK_LT(vmap_offset - matches, static_cast<uint32_t>(POPCOUNT(spill_mask)));
     uint32_t spill_shifts = 0;
     while (matches != (vmap_offset + 1)) {
       DCHECK_NE(spill_mask, 0u);