Merge "Fix a lock level violation with background compaction enabled."
diff --git a/Android.mk b/Android.mk
index 01819ee..fe631d9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -449,9 +449,7 @@
 use-art-full:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags ""
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
@@ -461,9 +459,7 @@
 use-art-smart:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
@@ -473,9 +469,7 @@
 use-art-interpret-only:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
@@ -485,9 +479,7 @@
 use-art-verify-none:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
+	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 6135571..188ddb5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -110,6 +110,12 @@
 DALVIKVM_FLAGS := -Xcompiler-option --compiler-backend=Optimizing
 endif
 
+#
+# Used to change the default GC. Valid values are CMS, SS, GSS. The default is CMS.
+#
+ART_DEFAULT_GC_TYPE ?= CMS
+ART_DEFAULT_GC_TYPE_CFLAGS := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE)
+
 LLVM_ROOT_PATH := external/llvm
 # Don't fail a dalvik minimal host build.
 -include $(LLVM_ROOT_PATH)/llvm.mk
@@ -237,6 +243,7 @@
 
 ART_HOST_CFLAGS := $(art_cflags) -DANDROID_SMP=1 -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
+ART_HOST_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
 ART_TARGET_CFLAGS := $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
 ifeq ($(TARGET_CPU_SMP),true)
@@ -244,6 +251,7 @@
 else
   ART_TARGET_CFLAGS += -DANDROID_SMP=0
 endif
+ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
 # DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is set in ../build/core/dex_preopt.mk based on
 # the TARGET_CPU_VARIANT
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 89c642d..d9d392f 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -46,7 +46,7 @@
   // (1 << kNullCheckElimination) |
   // (1 << kClassInitCheckElimination) |
   // (1 << kPromoteRegs) |
-  (1 << kTrackLiveTemps) |        // FIXME: disable until liveness issue fixed.
+  // (1 << kTrackLiveTemps) |
   // (1 << kSafeOptimizations) |
   // (1 << kBBOpt) |
   // (1 << kMatch) |
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index fc6af29..8fcb09b 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,12 +139,25 @@
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
-  rl_src = EvalLoc(rl_src, op_kind, false);
-  if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
-    LoadValueDirect(rl_src, rl_src.reg);
-    rl_src.location = kLocPhysReg;
-    MarkLive(rl_src);
+  rl_src = UpdateLoc(rl_src);
+  if (rl_src.location == kLocPhysReg) {
+    if (!RegClassMatches(op_kind, rl_src.reg)) {
+      // Wrong register class, realloc, copy and transfer ownership.
+      RegStorage new_reg = AllocTypedTemp(rl_src.fp, op_kind);
+      OpRegCopy(new_reg, rl_src.reg);
+      // Associate the old sreg with the new register and clobber the old register.
+      GetRegInfo(new_reg)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      Clobber(rl_src.reg);
+      rl_src.reg = new_reg;
+    }
+    return rl_src;
   }
+
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  rl_src.reg = AllocTypedTemp(rl_src.fp, op_kind);
+  LoadValueDirect(rl_src, rl_src.reg);
+  rl_src.location = kLocPhysReg;
+  MarkLive(rl_src);
   return rl_src;
 }
 
@@ -203,12 +216,26 @@
 
 RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) {
   DCHECK(rl_src.wide);
-  rl_src = EvalLoc(rl_src, op_kind, false);
-  if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) {
-    LoadValueDirectWide(rl_src, rl_src.reg);
-    rl_src.location = kLocPhysReg;
-    MarkLive(rl_src);
+  rl_src = UpdateLocWide(rl_src);
+  if (rl_src.location == kLocPhysReg) {
+    if (!RegClassMatches(op_kind, rl_src.reg)) {
+      // Wrong register class, realloc, copy and transfer ownership.
+      RegStorage new_regs = AllocTypedTempWide(rl_src.fp, op_kind);
+      OpRegCopyWide(new_regs, rl_src.reg);
+      // Associate the old sreg with the new register and clobber the old register.
+      GetRegInfo(new_regs)->SetSReg(GetRegInfo(rl_src.reg)->SReg());
+      Clobber(rl_src.reg);
+      rl_src.reg = new_regs;
+    }
+    return rl_src;
   }
+
+  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
+  DCHECK_NE(GetSRegHi(rl_src.s_reg_low), INVALID_SREG);
+  rl_src.reg = AllocTypedTempWide(rl_src.fp, op_kind);
+  LoadValueDirectWide(rl_src, rl_src.reg);
+  rl_src.location = kLocPhysReg;
+  MarkLive(rl_src);
   return rl_src;
 }
 
@@ -233,7 +260,11 @@
     if (IsLive(rl_src.reg) ||
         IsPromoted(rl_src.reg) ||
         (rl_dest.location == kLocPhysReg)) {
-      // Src is live or promoted or Dest has assigned reg.
+      /*
+       * If src reg[s] are tied to the original Dalvik vreg via liveness or promotion, we
+       * can't repurpose them.  Similarly, if the dest reg[s] are tied to Dalvik vregs via
+       * promotion, we can't just re-assign.  In these cases, we have to copy.
+       */
       rl_dest = EvalLoc(rl_dest, kAnyReg, false);
       OpRegCopyWide(rl_dest.reg, rl_src.reg);
     } else {
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index f5d71c4..2973e14 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -25,10 +25,10 @@
 
 /* Mark a temp register as dead.  Does not affect allocation state. */
 inline void Mir2Lir::ClobberBody(RegisterInfo* p) {
-  if (p->IsTemp()) {
+  DCHECK(p->IsTemp());
+  if (!p->IsDead()) {
     DCHECK(!(p->IsLive() && p->IsDirty()))  << "Live & dirty temp in clobber";
-    p->SetIsLive(false);
-    p->SetSReg(INVALID_SREG);
+    p->MarkDead();
     p->ResetDefBody();
     if (p->IsWide()) {
       p->SetIsWide(false);
@@ -36,8 +36,7 @@
         // Register pair - deal with the other half.
         p = GetRegInfo(p->Partner());
         p->SetIsWide(false);
-        p->SetIsLive(false);
-        p->SetSReg(INVALID_SREG);
+        p->MarkDead();
         p->ResetDefBody();
       }
     }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 0ffd189..77119a4 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -979,7 +979,7 @@
   }
 
   // Free temp registers and reset redundant store tracking.
-  ClobberAllRegs();
+  ClobberAllTemps();
 
   if (bb->block_type == kEntryBlock) {
     ResetRegPool();
@@ -994,7 +994,7 @@
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     ResetRegPool();
     if (cu_->disable_opt & (1 << kTrackLiveTemps)) {
-      ClobberAllRegs();
+      ClobberAllTemps();
       // Reset temp allocation to minimize differences when A/B testing.
       reg_pool_->ResetNextTemp();
     }
@@ -1074,7 +1074,7 @@
   // Free temp registers and reset redundant store tracking.
   ResetRegPool();
   ResetDefTracking();
-  ClobberAllRegs();
+  ClobberAllTemps();
 
   return GenSpecialCase(bb, mir, special);
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c5125a2..3016cd1 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -291,6 +291,20 @@
      *    0x0000ffff for 512-bit view of ymm1   // future expansion, if needed
      *    0xffffffff for 1024-bit view of ymm1  // future expansion, if needed
      *
+     * The "liveness" of a register is handled in a similar way.  The liveness_ storage is
+     * held in the widest member of an aliased set.  Note, though, that for a temp register to
+     * reused as live, it must both be marked live and the associated SReg() must match the
+     * desired s_reg.  This gets a little complicated when dealing with aliased registers.  All
+     * members of an aliased set will share the same liveness flags, but each will individually
+     * maintain s_reg_.  In this way we can know that at least one member of an
+     * aliased set is live, but will only fully match on the appropriate alias view.  For example,
+     * if Arm d1 is live as a double and has s_reg_ set to Dalvik v8 (which also implies v9
+     * because it is wide), its aliases s2 and s3 will show as live, but will have
+     * s_reg_ == INVALID_SREG.  An attempt to later AllocLiveReg() of v9 with a single-precision
+     * view will fail because although s3's liveness bit is set, its s_reg_ will not match v9.
+     * This will cause all members of the aliased set to be clobbered and AllocLiveReg() will
+     * report that v9 is currently not live as a single (which is what we want).
+     *
      * NOTE: the x86 usage is still somewhat in flux.  There are competing notions of how
      * to treat xmm registers:
      *     1. Treat them all as 128-bits wide, but denote how much data used via bytes field.
@@ -319,14 +333,21 @@
       bool InUse() { return (storage_mask_ & master_->used_storage_) != 0; }
       void MarkInUse() { master_->used_storage_ |= storage_mask_; }
       void MarkFree() { master_->used_storage_ &= ~storage_mask_; }
+      // No part of the containing storage is live in this view.
+      bool IsDead() { return (master_->liveness_ & storage_mask_) == 0; }
+      // Liveness of this view matches.  Note: not equivalent to !IsDead().
+      bool IsLive() { return (master_->liveness_ & storage_mask_) == storage_mask_; }
+      void MarkLive() { master_->liveness_ |= storage_mask_; }
+      void MarkDead() {
+        master_->liveness_ &= ~storage_mask_;
+        SetSReg(INVALID_SREG);
+      }
       RegStorage GetReg() { return reg_; }
       void SetReg(RegStorage reg) { reg_ = reg; }
       bool IsTemp() { return is_temp_; }
       void SetIsTemp(bool val) { is_temp_ = val; }
       bool IsWide() { return wide_value_; }
       void SetIsWide(bool val) { wide_value_ = val; }
-      bool IsLive() { return live_; }
-      void SetIsLive(bool val) { live_ = val; }
       bool IsDirty() { return dirty_; }
       void SetIsDirty(bool val) { dirty_ = val; }
       RegStorage Partner() { return partner_; }
@@ -336,7 +357,17 @@
       uint64_t DefUseMask() { return def_use_mask_; }
       void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; }
       RegisterInfo* Master() { return master_; }
-      void SetMaster(RegisterInfo* master) { master_ = master; }
+      void SetMaster(RegisterInfo* master) {
+        master_ = master;
+        if (master != this) {
+          master_->aliased_ = true;
+          DCHECK(alias_chain_ == nullptr);
+          alias_chain_ = master_->alias_chain_;
+          master_->alias_chain_ = this;
+        }
+      }
+      bool IsAliased() { return aliased_; }
+      RegisterInfo* GetAliasChain() { return alias_chain_; }
       uint32_t StorageMask() { return storage_mask_; }
       void SetStorageMask(uint32_t storage_mask) { storage_mask_ = storage_mask; }
       LIR* DefStart() { return def_start_; }
@@ -350,16 +381,18 @@
       RegStorage reg_;
       bool is_temp_;               // Can allocate as temp?
       bool wide_value_;            // Holds a Dalvik wide value (either itself, or part of a pair).
-      bool live_;                  // Is there an associated SSA name?
       bool dirty_;                 // If live, is it dirty?
+      bool aliased_;               // Is this the master for other aliased RegisterInfo's?
       RegStorage partner_;         // If wide_value, other reg of pair or self if 64-bit register.
       int s_reg_;                  // Name of live value.
       uint64_t def_use_mask_;      // Resources for this element.
       uint32_t used_storage_;      // 1 bit per 4 bytes of storage. Unused by aliases.
+      uint32_t liveness_;          // 1 bit per 4 bytes of storage. Unused by aliases.
       RegisterInfo* master_;       // Pointer to controlling storage mask.
       uint32_t storage_mask_;      // Track allocation of sub-units.
       LIR *def_start_;             // Starting inst in last def sequence.
       LIR *def_end_;               // Ending inst in last def sequence.
+      RegisterInfo* alias_chain_;  // Chain of aliased registers.
     };
 
     class RegisterPool {
@@ -598,8 +631,8 @@
     void DumpRegPools();
     /* Mark a temp register as dead.  Does not affect allocation state. */
     void Clobber(RegStorage reg);
-    void ClobberSRegBody(GrowableArray<RegisterInfo*>* regs, int s_reg);
     void ClobberSReg(int s_reg);
+    void ClobberAliases(RegisterInfo* info);
     int SRegToPMap(int s_reg);
     void RecordCorePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedCoreReg(int s_reg);
@@ -630,7 +663,7 @@
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
-    void ClobberAllRegs();
+    void ClobberAllTemps();
     void FlushSpecificReg(RegisterInfo* info);
     void FlushAllRegs();
     bool RegClassMatches(int reg_class, RegStorage reg);
@@ -648,9 +681,9 @@
     RegLocation UpdateRawLoc(RegLocation loc);
 
     /**
-     * @brief Used to load register location into a typed temporary or pair of temporaries.
+     * @brief Used to prepare a register location to receive a wide value.
      * @see EvalLoc
-     * @param loc The register location to load from.
+     * @param loc the location where the value will be stored.
      * @param reg_class Type of register needed.
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register pairs.
@@ -658,8 +691,8 @@
     RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
 
     /**
-     * @brief Used to load register location into a typed temporary.
-     * @param loc The register location to load from.
+     * @brief Used to prepare a register location to receive a value.
+     * @param loc the location where the value will be stored.
      * @param reg_class Type of register needed.
      * @param update Whether the liveness information should be updated.
      * @return Returns the properly typed temporary in physical register.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 76553af..bcc077b 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -39,8 +39,9 @@
 }
 
 Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask)
-  : reg_(r), is_temp_(false), wide_value_(false), live_(false),
-    dirty_(false), partner_(r), s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this) {
+  : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r),
+    s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr),
+    def_end_(nullptr), alias_chain_(nullptr) {
   switch (r.StorageSize()) {
     case 0: storage_mask_ = 0xffffffff; break;
     case 4: storage_mask_ = 0x00000001; break;
@@ -51,6 +52,7 @@
     case 128: storage_mask_ = 0xffffffff; break;
   }
   used_storage_ = r.Valid() ? ~storage_mask_ : storage_mask_;
+  liveness_ = used_storage_;
 }
 
 Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
@@ -65,9 +67,13 @@
     next_sp_reg_(0), dp_regs_(arena, dp_regs.size()), next_dp_reg_(0), m2l_(m2l)  {
   // Initialize the fast lookup map.
   m2l_->reginfo_map_.Reset();
-  m2l_->reginfo_map_.Resize(RegStorage::kMaxRegs);
-  for (unsigned i = 0; i < RegStorage::kMaxRegs; i++) {
-    m2l_->reginfo_map_.Insert(nullptr);
+  if (kIsDebugBuild) {
+    m2l_->reginfo_map_.Resize(RegStorage::kMaxRegs);
+    for (unsigned i = 0; i < RegStorage::kMaxRegs; i++) {
+      m2l_->reginfo_map_.Insert(nullptr);
+    }
+  } else {
+    m2l_->reginfo_map_.SetSize(RegStorage::kMaxRegs);
   }
 
   // Construct the register pool.
@@ -138,24 +144,43 @@
 }
 
 void Mir2Lir::Clobber(RegStorage reg) {
-  if (reg.IsPair()) {
-    ClobberBody(GetRegInfo(reg.GetLow()));
-    ClobberBody(GetRegInfo(reg.GetHigh()));
+  if (UNLIKELY(reg.IsPair())) {
+    DCHECK(!GetRegInfo(reg.GetLow())->IsAliased());
+    Clobber(reg.GetLow());
+    DCHECK(!GetRegInfo(reg.GetHigh())->IsAliased());
+    Clobber(reg.GetHigh());
   } else {
-    ClobberBody(GetRegInfo(reg));
+    RegisterInfo* info = GetRegInfo(reg);
+    if (info->IsTemp() && !info->IsDead()) {
+      ClobberBody(info);
+      if (info->IsAliased()) {
+        ClobberAliases(info);
+      } else {
+        RegisterInfo* master = info->Master();
+        if (info != master) {
+          ClobberBody(info->Master());
+        }
+      }
+    }
   }
 }
 
-void Mir2Lir::ClobberSRegBody(GrowableArray<RegisterInfo*>* regs, int s_reg) {
-  GrowableArray<RegisterInfo*>::Iterator it(regs);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    if ((info->SReg() == s_reg)  ||
-        (info->IsWide() && (GetRegInfo(info->Partner())->SReg() == s_reg))) {
-      // NOTE: a single s_reg may appear multiple times, so we can't short-circuit.
-      if (info->IsTemp()) {
-        info->SetIsLive(false);
+void Mir2Lir::ClobberAliases(RegisterInfo* info) {
+  for (RegisterInfo* alias = info->GetAliasChain(); alias != nullptr;
+       alias = alias->GetAliasChain()) {
+    DCHECK(!alias->IsAliased());  // Only the master should be marked as alised.
+    if (alias->SReg() != INVALID_SREG) {
+      alias->SetSReg(INVALID_SREG);
+      alias->ResetDefBody();
+      if (alias->IsWide()) {
+        alias->SetIsWide(false);
+        if (alias->GetReg() != alias->Partner()) {
+          RegisterInfo* p = GetRegInfo(alias->Partner());
+          p->SetIsWide(false);
+          p->MarkDead();
+          p->ResetDefBody();
+        }
       }
-      info->ResetDefBody();
     }
   }
 }
@@ -173,15 +198,18 @@
  */
 void Mir2Lir::ClobberSReg(int s_reg) {
   if (s_reg != INVALID_SREG) {
-    /* Reset live temp tracking sanity checker */
-    if (kIsDebugBuild) {
-      if (s_reg == live_sreg_) {
-        live_sreg_ = INVALID_SREG;
+    if (kIsDebugBuild && s_reg == live_sreg_) {
+      live_sreg_ = INVALID_SREG;
+    }
+    GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+    for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
+      if (info->SReg() == s_reg) {
+        ClobberBody(info);
+        if (info->IsAliased()) {
+          ClobberAliases(info);
+        }
       }
     }
-    ClobberSRegBody(&reg_pool_->core_regs_, s_reg);
-    ClobberSRegBody(&reg_pool_->sp_regs_, s_reg);
-    ClobberSRegBody(&reg_pool_->dp_regs_, s_reg);
   }
 }
 
@@ -296,9 +324,14 @@
     if (next >= num_regs)
       next = 0;
     RegisterInfo* info = regs.Get(next);
+    // Try to allocate a register that doesn't hold a live value.
     if (info->IsTemp() && !info->InUse() && !info->IsLive()) {
       Clobber(info->GetReg());
       info->MarkInUse();
+      /*
+       * NOTE: "wideness" is an attribute of how the container is used, not its physical size.
+       * The caller will set wideness as appropriate.
+       */
       info->SetIsWide(false);
       *next_temp = next + 1;
       return info->GetReg();
@@ -306,11 +339,14 @@
     next++;
   }
   next = *next_temp;
+  // No free non-live regs.  Anything we can kill?
   for (int i = 0; i< num_regs; i++) {
     if (next >= num_regs)
       next = 0;
     RegisterInfo* info = regs.Get(next);
     if (info->IsTemp() && !info->InUse()) {
+      // Got one.  Kill it.
+      ClobberSReg(info->SReg());
       Clobber(info->GetReg());
       info->MarkInUse();
       info->SetIsWide(false);
@@ -367,39 +403,47 @@
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
+    // TODO: add 64-bit core pool similar to above.
     reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
   }
   if (reg.Valid()) {
-    if (wide && reg.Is32Bit() && !reg.IsFloat()) {
-      // Only allow reg pairs for Core.
+    if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) {
+      // Only allow reg pairs for core regs on 32-bit targets.
       RegStorage high_reg = FindLiveReg(reg_pool_->core_regs_, s_reg + 1);
       if (high_reg.Valid()) {
-        RegisterInfo* info_lo = GetRegInfo(reg);
-        RegisterInfo* info_hi = GetRegInfo(high_reg);
-        if (info_lo->IsTemp()) {
-          info_lo->MarkInUse();
-        }
-        if (info_hi->IsTemp()) {
-          info_hi->MarkInUse();
-        }
         reg = RegStorage::MakeRegPair(reg, high_reg);
         MarkWide(reg);
       } else {
-        // Only half available - clobber.
-        Clobber(reg);
+        // Only half available.
         reg = RegStorage::InvalidReg();
       }
     }
-    if (reg.Valid() && !reg.IsPair()) {
+    if (reg.Valid() && (wide != GetRegInfo(reg)->IsWide())) {
+      // Width mismatch - don't try to reuse.
+      reg = RegStorage::InvalidReg();
+    }
+  }
+  if (reg.Valid()) {
+    if (reg.IsPair()) {
+      RegisterInfo* info_low = GetRegInfo(reg.GetLow());
+      RegisterInfo* info_high = GetRegInfo(reg.GetHigh());
+      if (info_low->IsTemp()) {
+        info_low->MarkInUse();
+      }
+      if (info_high->IsTemp()) {
+        info_high->MarkInUse();
+      }
+    } else {
       RegisterInfo* info = GetRegInfo(reg);
       if (info->IsTemp()) {
         info->MarkInUse();
       }
     }
-    if (reg.Valid() && (wide != GetRegInfo(reg)->IsWide())) {
-      // Width mismatch - don't try to reuse.
-      Clobber(reg);
-      reg = RegStorage::InvalidReg();
+  } else {
+    // Either not found, or something didn't match up. Clobber to prevent any stale instances.
+    ClobberSReg(s_reg);
+    if (wide) {
+      ClobberSReg(s_reg + 1);
     }
   }
   return reg;
@@ -424,6 +468,7 @@
   if (reg.IsPair()) {
     RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
+    DCHECK_EQ(p_lo->IsLive(), p_hi->IsLive());
     res = p_lo->IsLive() || p_hi->IsLive();
   } else {
     RegisterInfo* p = GetRegInfo(reg);
@@ -482,13 +527,13 @@
     RegisterInfo* p_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* p_hi = GetRegInfo(reg.GetHigh());
     p_lo->MarkInUse();
-    p_lo->SetIsLive(false);
+    p_lo->MarkDead();
     p_hi->MarkInUse();
-    p_hi->SetIsLive(false);
+    p_hi->MarkDead();
   } else {
     RegisterInfo* p = GetRegInfo(reg);
     p->MarkInUse();
-    p->SetIsLive(false);
+    p->MarkDead();
   }
 }
 
@@ -567,6 +612,18 @@
         info_hi->ResetDefBody();
       }
       rl.reg = rl.reg.GetLow();
+    } else {
+      /*
+       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
+       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
+       * this code.  Will probably want to make this a virtual function.
+       */
+      // Can't narrow 64-bit register.  Clobber.
+      if (GetRegInfo(rl.reg)->IsTemp()) {
+        Clobber(rl.reg);
+        FreeTemp(rl.reg);
+      }
+      rl.location = kLocDalvikFrame;
     }
   }
   rl.wide = false;
@@ -606,13 +663,10 @@
   }
 }
 
-void Mir2Lir::ClobberAllRegs() {
+void Mir2Lir::ClobberAllTemps() {
   GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
   for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
-    info->SetIsLive(false);
-    info->SetSReg(INVALID_SREG);
-    info->ResetDefBody();
-    info->SetIsWide(false);
+    ClobberBody(info);
   }
 }
 
@@ -667,11 +721,10 @@
 void Mir2Lir::FlushAllRegs() {
   GrowableArray<RegisterInfo*>::Iterator it(&tempreg_info_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    if (info->IsLive() && info->IsDirty()) {
+    if (info->IsDirty() && info->IsLive()) {
       FlushSpecificReg(info);
     }
-    DCHECK(info->IsTemp());
-    info->SetIsLive(false);
+    info->MarkDead();
     info->SetSReg(INVALID_SREG);
     info->ResetDefBody();
     info->SetIsWide(false);
@@ -697,12 +750,12 @@
   if (s_reg != INVALID_SREG) {
     ClobberSReg(s_reg);
     if (info->IsTemp()) {
-      info->SetIsLive(true);
+      info->MarkLive();
     }
   } else {
     // Can't be live if no associated s_reg.
     DCHECK(info->IsTemp());
-    info->SetIsLive(false);
+    info->MarkDead();
   }
   info->SetSReg(s_reg);
 }
@@ -901,9 +954,8 @@
   /* If already in registers, we can assume proper form.  Right reg class? */
   if (loc.location == kLocPhysReg) {
     if (!RegClassMatches(reg_class, loc.reg)) {
-      /* Wrong register class.  Reallocate and copy */
+      // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_regs = AllocTypedTempWide(loc.fp, reg_class);
-      OpRegCopyWide(new_regs, loc.reg);
       // Associate the old sreg with the new register and clobber the old register.
       GetRegInfo(new_regs)->SetSReg(GetRegInfo(loc.reg)->SReg());
       Clobber(loc.reg);
@@ -935,9 +987,8 @@
 
   if (loc.location == kLocPhysReg) {
     if (!RegClassMatches(reg_class, loc.reg)) {
-      /* Wrong register class.  Realloc, copy and transfer ownership */
+      // Wrong register class.  Reallocate and transfer ownership.
       RegStorage new_reg = AllocTypedTemp(loc.fp, reg_class);
-      OpRegCopy(new_reg, loc.reg);
       // Associate the old sreg with the new register and clobber the old register.
       GetRegInfo(new_reg)->SetSReg(GetRegInfo(loc.reg)->SReg());
       Clobber(loc.reg);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index f7fcf19..9648312 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -521,6 +521,19 @@
     void Materialize();
 
     /*
+     * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
+     * without regard to data type.  In practice, this can result in UpdateLoc returning a
+     * location record for a Dalvik float value in a core register, and vis-versa.  For targets
+     * which can inexpensively move data between core and float registers, this can often be a win.
+     * However, for x86 this is generally not a win.  These variants of UpdateLoc()
+     * take a register class argument - and will return an in-register location record only if
+     * the value is live in a temp register of the correct class.  Additionally, if the value is in
+     * a temp register of the wrong register class, it will be clobbered.
+     */
+    RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
+    RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
+
+    /*
      * @brief Analyze MIR before generating code, to prepare for the code generation.
      */
     void AnalyzeMIR();
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index d1c2e70..22e554e 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -173,7 +173,8 @@
    * If the result's location is in memory, then we do not need to do anything
    * more since the fstp has already placed the correct value in memory.
    */
-  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
   if (rl_result.location == kLocPhysReg) {
     /*
      * We already know that the result is in a physical register but do not know if it is the
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ce5766f..b6e0841 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1054,7 +1054,7 @@
     int32_t val_hi = High32Bits(val);
     FlushAllRegs();
     LockCallTemps();  // Prepare for explicit register usage.
-    rl_src1 = UpdateLocWide(rl_src1);
+    rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
     bool src1_in_reg = rl_src1.location == kLocPhysReg;
     int displacement = SRegOffset(rl_src1.s_reg_low);
 
@@ -1100,8 +1100,8 @@
 
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage.
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // At this point, the VRs are in their home locations.
   bool src1_in_reg = rl_src1.location == kLocPhysReg;
@@ -1196,7 +1196,7 @@
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    rl_src = EvalLocWide(rl_src, kCoreReg, true);
+    rl_src = LoadValueWide(rl_src, kCoreReg);
     if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
       // The registers are the same, so we would clobber it before the use.
       RegStorage temp_reg = AllocTemp();
@@ -1227,12 +1227,12 @@
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   if (rl_dest.location == kLocPhysReg) {
     // Ensure we are in a register pair
     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
 
-    rl_src = UpdateLocWide(rl_src);
+    rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src, op);
     StoreFinalValueWide(rl_dest, rl_result);
     return;
@@ -1285,7 +1285,7 @@
     rl_result = ForceTempWide(rl_result);
 
     // Perform the operation using the RHS.
-    rl_src2 = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src2, op);
 
     // And now record that the result is in the temp.
@@ -1296,8 +1296,8 @@
   // It wasn't in registers, so it better be in memory.
   DCHECK((rl_dest.location == kLocDalvikFrame) ||
          (rl_dest.location == kLocCompilerTemp));
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -1731,7 +1731,7 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
 
   // Can we just do this into memory?
   if ((rl_dest.location == kLocDalvikFrame) ||
@@ -1779,8 +1779,8 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
-  rl_src1 = UpdateLocWide(rl_src1);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
 
   // Can we do this directly into the destination registers?
   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
@@ -2070,7 +2070,7 @@
 
   if (unary) {
     rl_lhs = LoadValue(rl_lhs, kCoreReg);
-    rl_result = UpdateLoc(rl_dest);
+    rl_result = UpdateLocTyped(rl_dest, kCoreReg);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   } else {
@@ -2080,7 +2080,7 @@
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         rl_rhs = LoadValue(rl_rhs, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory
@@ -2104,12 +2104,12 @@
       // Multiply is 3 operand only (sort of).
       if (is_two_addr && op != kOpMul) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location == kLocPhysReg) {
           // Ensure res is in a core reg
           rl_result = EvalLoc(rl_dest, kCoreReg, true);
           // Can we do this from memory directly?
-          rl_rhs = UpdateLoc(rl_rhs);
+          rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
           if (rl_rhs.location != kLocPhysReg) {
             OpRegMem(op, rl_result.reg, rl_rhs);
             StoreFinalValue(rl_dest, rl_result);
@@ -2137,8 +2137,8 @@
         }
       } else {
         // Try to use reg/memory instructions.
-        rl_lhs = UpdateLoc(rl_lhs);
-        rl_rhs = UpdateLoc(rl_rhs);
+        rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
+        rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
         // We can't optimize with FP registers.
         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
           // Something is difficult, so fall back to the standard case.
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 0c61439..c401baf 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -854,7 +854,7 @@
       }
     } else {
       // Runtime start index.
-      rl_start = UpdateLoc(rl_start);
+      rl_start = UpdateLocTyped(rl_start, kCoreReg);
       if (rl_start.location == kLocPhysReg) {
         // Handle "start index < 0" case.
         OpRegReg(kOpXor, rs_rBX, rs_rBX);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 5326c2b..a4e1255 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -904,4 +904,30 @@
   }
 }
 
+RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLoc(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
+RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLocWide(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index df5aa7b..979f516 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -22,14 +22,14 @@
 
 /*
  * 16-bit representation of the physical register container holding a Dalvik value.
- * The encoding allows up to 32 physical elements per storage class, and supports eight
+ * The encoding allows up to 64 physical elements per storage class, and supports eight
  * register container shapes.
  *
- * [V] [D] [HHHHH] [SSS] [F] [LLLLL]
+ * [V] [HHHHH] [SSS] [F] [LLLLLL]
  *
- * [LLLLL]
+ * [LLLLLL]
  *  Physical register number for the low or solo register.
- *    0..31
+ *    0..63
  *
  * [F]
  *  Describes type of the [LLLLL] register.
@@ -51,19 +51,13 @@
  *  Physical register number of the high register (valid only for register pair).
  *    0..31
  *
- * [D]
- *  Describes type of the [HHHHH] register (valid only for register pair).
- *    0: Core
- *    1: Floating point
- *
  * [V]
  *    0 -> Invalid
  *    1 -> Valid
  *
  * Note that in all non-invalid cases, we can determine if the storage is floating point
- * by testing bit 6.  Though a mismatch appears to be permitted by the format, the [F][D] values
- * from each half of a pair must match (this allows the high and low regs of a pair to be more
- * easily individually manipulated).
+ * by testing bit 7.  Note also that a register pair is effectively limited to a pair of
+ * physical register numbers in the 0..31 range.
  *
  * On some target architectures, the same underlying physical register container can be given
  * different views.  For example, Arm's 32-bit single-precision floating point registers
@@ -82,30 +76,30 @@
     kValidMask     = 0x8000,
     kValid         = 0x8000,
     kInvalid       = 0x0000,
-    kShapeMask     = 0x01c0,
-    k32BitSolo     = 0x0040,
-    k64BitSolo     = 0x0080,
-    k64BitPair     = 0x00c0,
-    k128BitSolo    = 0x0100,
-    k256BitSolo    = 0x0140,
-    k512BitSolo    = 0x0180,
-    k1024BitSolo   = 0x01c0,
-    k64BitMask     = 0x0180,
-    k64Bits        = 0x0080,
-    kShapeTypeMask = 0x01e0,
-    kFloatingPoint = 0x0020,
+    kShapeMask     = 0x0380,
+    k32BitSolo     = 0x0080,
+    k64BitSolo     = 0x0100,
+    k64BitPair     = 0x0180,
+    k128BitSolo    = 0x0200,
+    k256BitSolo    = 0x0280,
+    k512BitSolo    = 0x0300,
+    k1024BitSolo   = 0x0380,
+    k64BitMask     = 0x0300,
+    k64Bits        = 0x0100,
+    kShapeTypeMask = 0x03c0,
+    kFloatingPoint = 0x0040,
     kCoreRegister  = 0x0000,
   };
 
-  static const uint16_t kRegValMask  = 0x01ff;  // Num, type and shape.
-  static const uint16_t kRegTypeMask = 0x003f;  // Num and type.
-  static const uint16_t kRegNumMask  = 0x001f;  // Num only.
+  static const uint16_t kRegValMask  = 0x03ff;     // Num, type and shape.
+  static const uint16_t kRegTypeMask = 0x007f;     // Num and type.
+  static const uint16_t kRegNumMask  = 0x003f;     // Num only.
+  static const uint16_t kHighRegNumMask = 0x001f;  // 0..31 for high reg
   static const uint16_t kMaxRegs     = kRegValMask + 1;
-  // TODO: deprecate use of kInvalidRegVal and speed up GetReg().
-  static const uint16_t kInvalidRegVal = 0x01ff;
-  static const uint16_t kHighRegShift = 9;
-  static const uint16_t kShapeMaskShift = 6;
-  static const uint16_t kHighRegMask = (kRegTypeMask << kHighRegShift);
+  // TODO: deprecate use of kInvalidRegVal and speed up GetReg().  Rely on valid bit instead.
+  static const uint16_t kInvalidRegVal = 0x03ff;
+  static const uint16_t kHighRegShift = 10;
+  static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift);
 
   // Reg is [F][LLLLL], will override any existing shape and use rs_kind.
   RegStorage(RegStorageKind rs_kind, int reg) {
@@ -116,7 +110,9 @@
   RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) {
     DCHECK_EQ(rs_kind, k64BitPair);
     DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint);
-    reg_ = kValid | rs_kind | ((high_reg & kRegTypeMask) << kHighRegShift) | (low_reg & kRegTypeMask);
+    DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31";
+    reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) |
+        (low_reg & kRegTypeMask);
   }
   constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
   RegStorage() : reg_(kInvalid) {}
@@ -206,7 +202,7 @@
   // Retrieve the most significant register of a pair.
   int GetHighReg() const {
     DCHECK(IsPair());
-    return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift);
+    return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift) | (reg_ & kFloatingPoint);
   }
 
   // Create a stand-alone RegStorage from the high reg of a pair.
@@ -217,7 +213,7 @@
 
   void SetHighReg(int reg) {
     DCHECK(IsPair());
-    reg_ = (reg_ & ~kHighRegMask) | ((reg & kRegTypeMask) << kHighRegShift);
+    reg_ = (reg_ & ~kHighRegMask) | ((reg & kHighRegNumMask) << kHighRegShift);
   }
 
   // Return the register number of low or solo.
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index fb909a8..d03b99f 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -62,18 +62,15 @@
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
-      // FIXME IPx used by VIXL - this is unsafe.
       __ JumpTo(Arm64ManagedRegister::FromCoreRegister(X0), Offset(offset.Int32Value()),
           Arm64ManagedRegister::FromCoreRegister(IP1));
 
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
-
       __ LoadRawPtr(Arm64ManagedRegister::FromCoreRegister(IP1),
                       Arm64ManagedRegister::FromCoreRegister(X0),
                       Offset(JNIEnvExt::SelfOffset().Int32Value()));
 
-      // FIXME IPx used by VIXL - this is unsafe.
       __ JumpTo(Arm64ManagedRegister::FromCoreRegister(IP1), Offset(offset.Int32Value()),
                 Arm64ManagedRegister::FromCoreRegister(IP0));
 
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index b4bb979..f486b3c 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -50,11 +50,11 @@
 }
 
 void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(TR1));
+  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(ETR));
 }
 
 void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR1, SP, offset.Int32Value());
+  StoreToOffset(ETR, SP, offset.Int32Value());
 }
 
 // See Arm64 PCS Section 5.2.2.1.
@@ -79,11 +79,13 @@
     // VIXL macro-assembler handles all variants.
     ___ Add(reg_x(rd), reg_x(rn), value);
   } else {
-    // ip1 = rd + value
-    // rd = cond ? ip1 : rn
-    CHECK_NE(rn, IP1);
-    ___ Add(reg_x(IP1), reg_x(rn), value);
-    ___ Csel(reg_x(rd), reg_x(IP1), reg_x(rd), COND_OP(cond));
+    // temp = rd + value
+    // rd = cond ? temp : rn
+    vixl::UseScratchRegisterScope temps(vixl_masm_);
+    temps.Exclude(reg_x(rd), reg_x(rn));
+    vixl::Register temp = temps.AcquireX();
+    ___ Add(temp, reg_x(rn), value);
+    ___ Csel(reg_x(rd), temp, reg_x(rd), COND_OP(cond));
   }
 }
 
@@ -162,7 +164,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(scratch.AsCoreRegister(), TR1, offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
@@ -171,13 +173,14 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
-  // Arm64 does not support: "str sp, [dest]" therefore we use IP1 as a temp reg.
-  ___ Mov(reg_x(IP1), reg_x(SP));
-  StoreToOffset(IP1, TR1, tr_offs.Int32Value());
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  vixl::Register temp = temps.AcquireX();
+  ___ Mov(temp, reg_x(SP));
+  ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value()));
 }
 
 void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -195,12 +198,14 @@
   if ((cond == AL) || (cond == NV)) {
     ___ Mov(reg_x(dest), value);
   } else {
-    // ip1 = value
-    // rd = cond ? ip1 : rd
+    // temp = value
+    // rd = cond ? temp : rd
     if (value != 0) {
-      CHECK_NE(dest, IP1);
-      ___ Mov(reg_x(IP1), value);
-      ___ Csel(reg_x(dest), reg_x(IP1), reg_x(dest), COND_OP(cond));
+      vixl::UseScratchRegisterScope temps(vixl_masm_);
+      temps.Exclude(reg_x(dest));
+      vixl::Register temp = temps.AcquireX();
+      ___ Mov(temp, value);
+      ___ Csel(reg_x(dest), temp, reg_x(dest), COND_OP(cond));
     } else {
       ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), COND_OP(cond));
     }
@@ -276,7 +281,7 @@
 }
 
 void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), TR1, src.Int32Value(), size);
+  return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size);
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
@@ -298,13 +303,16 @@
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsCoreRegister() && base.IsCoreRegister());
-  LoadFromOffset(dst.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+  // Remove dst and base form the temp list - higher level API uses IP1, IP0.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(dst.AsCoreRegister()), reg_x(base.AsCoreRegister()));
+  ___ Ldr(reg_x(dst.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
 }
 
 void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(dst.AsCoreRegister(), TR1, offs.Int32Value());
+  LoadFromOffset(dst.AsCoreRegister(), ETR, offs.Int32Value());
 }
 
 // Copying routines.
@@ -342,7 +350,7 @@
                                           ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
   StoreToOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
 }
 
@@ -352,7 +360,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), ETR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
@@ -511,7 +519,10 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsCoreRegister()) << base;
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+  // Remove base and scratch form the temp list - higher level API uses IP1, IP0.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(base.AsCoreRegister()), reg_x(scratch.AsCoreRegister()));
+  ___ Ldr(reg_x(scratch.AsCoreRegister()), MEM_OP(reg_x(base.AsCoreRegister()), offs.Int32Value()));
   ___ Br(reg_x(scratch.AsCoreRegister()));
 }
 
@@ -595,13 +606,17 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsCoreRegister(), TR1, Thread::ExceptionOffset<8>().Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
   ___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
   ___ B(current_exception->Entry(), COND_OP(NE));
 }
 
 void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
-    // Bind exception poll entry.
+  vixl::UseScratchRegisterScope temps(vixl_masm_);
+  temps.Exclude(reg_x(exception->scratch_.AsCoreRegister()));
+  vixl::Register temp = temps.AcquireX();
+
+  // Bind exception poll entry.
   ___ Bind(exception->Entry());
   if (exception->stack_adjust_ != 0) {  // Fix up the frame.
     DecreaseFrameSize(exception->stack_adjust_);
@@ -609,12 +624,14 @@
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
   ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsCoreRegister()));
-  LoadFromOffset(IP1, TR1, QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+  ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR), reg_x(TR1));
+  // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
+  // to external functions that might trash TR. We do not need the original
+  // X19 saved in BuildFrame().
+  ___ Mov(reg_x(TR), reg_x(ETR));
 
-  ___ Blr(reg_x(IP1));
+  ___ Blr(temp);
   // Call should never return.
   ___ Brk();
 }
@@ -634,8 +651,10 @@
   CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
   ___ PushCalleeSavedRegisters();
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR1), reg_x(TR));
+  // Move TR(Caller saved) to ETR(Callee saved). The original X19 has been
+  // saved by PushCalleeSavedRegisters(). This way we make sure that TR is not
+  // trashed by native code.
+  ___ Mov(reg_x(ETR), reg_x(TR));
 
   // Increate frame to required size - must be at least space to push Method*.
   CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
@@ -681,8 +700,10 @@
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
-  // FIXME: Temporary fix for TR (XSELF).
-  ___ Mov(reg_x(TR), reg_x(TR1));
+  // We move ETR (Callee Saved) back to TR (Caller Saved) which might have
+  // been trashed in the native call. The original X19 (ETR) is restored as
+  // part of PopCalleeSavedRegisters().
+  ___ Mov(reg_x(TR), reg_x(ETR));
 
   // Pop callee saved and return to LR.
   ___ PopCalleeSavedRegisters();
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 97fb93a..583150c 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -85,6 +85,7 @@
   vixl_masm_(new vixl::MacroAssembler(vixl_buf_, kBufferSizeArm64)) {}
 
   virtual ~Arm64Assembler() {
+    delete vixl_masm_;
     delete[] vixl_buf_;
   }
 
@@ -237,8 +238,8 @@
   // Vixl buffer.
   byte* vixl_buf_;
 
-  // Unique ptr - vixl assembler.
-  UniquePtr<vixl::MacroAssembler> vixl_masm_;
+  // Vixl assembler.
+  vixl::MacroAssembler* vixl_masm_;
 
   // List of exception blocks to generate at the end of the code cache.
   std::vector<Arm64Exception*> exception_blocks_;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 3529c27..e90006e 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -764,7 +764,7 @@
 
   for (int i = 0; i < argc; i++) {
     const StringPiece option(argv[i]);
-    bool log_options = false;
+    const bool log_options = false;
     if (log_options) {
       LOG(INFO) << "dex2oat: option[" << i << "]=" << argv[i];
     }
@@ -957,7 +957,9 @@
   bool image = (!image_filename.empty());
   if (!image && boot_image_filename.empty()) {
     boot_image_filename += GetAndroidRoot();
-    boot_image_filename += "/framework/boot.art";
+    boot_image_filename += "/framework/boot-";
+    boot_image_filename += GetInstructionSetString(instruction_set);
+    boot_image_filename += ".art";
   }
   std::string boot_image_option;
   if (!boot_image_filename.empty()) {
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 412a052..fc60c02 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1548,7 +1548,7 @@
   }
   UniquePtr<Runtime> runtime(Runtime::Current());
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more managable ScopedObjectAccess.
+  // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = Runtime::Current()->GetHeap();
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8079460..f7cb254 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -443,35 +443,32 @@
     DELIVER_PENDING_EXCEPTION
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x0, x19                        // pass Thread::Current
+    mov x0, xSELF                        // pass Thread::Current
     mov x1, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(Thread*, SP)
 END \c_name
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
-    mov x1, x19                       // pass Thread::Current.
+    mov x1, xSELF                       // pass Thread::Current.
     mov x2, sp                        // pass SP.
     b   \cxx_name                     // \cxx_name(arg, Thread*, SP).
     brk 0
 END \c_name
 .endm
 
-// FIXME: Temporary fix for TR(XSELF).
 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x2, x19                       // pass Thread::Current
+    mov x2, xSELF                       // pass Thread::Current
     mov x3, sp                        // pass SP
     b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*, SP)
     brk 0
@@ -991,7 +988,6 @@
      * failure.
      */
     .extern artHandleFillArrayDataFromCode
-// TODO: xSELF -> x19.
 ENTRY art_quick_handle_fill_data
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // Save callee saves in case exception allocation triggers GC.
     mov    x2, xSELF                       // Pass Thread::Current.
@@ -1166,12 +1162,7 @@
     brk 0                         // Unreached.
 END art_quick_aput_obj
 
-UNIMPLEMENTED art_quick_initialize_static_storage
-UNIMPLEMENTED art_quick_initialize_type
-UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-
 // Macro to facilitate adding new allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1186,7 +1177,6 @@
 .endm
 
 // Macro to facilitate adding new array allocation entrypoints.
-// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
@@ -1244,6 +1234,16 @@
 END \name
 .endm
 
+    /*
+     * Entry from managed code when uninitialized static storage, this stub will run the class
+     * initializer and deliver the exception on error. On success the static storage base is
+     * returned.
+     */
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
+UNIMPLEMENTED art_quick_initialize_type
+UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+
 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
@@ -1273,8 +1273,13 @@
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_set64_static
 
-
-UNIMPLEMENTED art_quick_resolve_string
+    /*
+     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
+     * exception on error. On success the String is returned. x0 holds the referring method,
+     * w1 holds the string index. The fast path check for hit in strings cache has already been
+     * performed.
+     */
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
@@ -1293,7 +1298,7 @@
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
-    ldr  xSELF, [sp, #200]              // Restore self pointer.
+    ldr     xSELF, [sp, #200]           // Restore self pointer.
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME_NO_D0 // keep d0
@@ -1308,14 +1313,13 @@
 
 ENTRY art_quick_resolution_trampoline
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    mov x19, x0           // save the called method
     mov x2, xSELF
     mov x3, sp
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
-    mov x9, x0            // Remember returned code pointer in x9.
-    mov x0, x19           // Restore the method, before x19 is restored to on-call value
+    cbz x0, 1f
+    mov x9, x0              // Remember returned code pointer in x9.
+    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
-    cbz x9, 1f
     br x9
 1:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 2503918..ea346e0 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -56,8 +56,8 @@
   X29 = 29,
   X30 = 30,
   X31 = 31,
-  TR  = 18,     // ART Thread Register - Needs to be one of the callee saved regs.
-  TR1 = 19,     // FIXME!
+  TR  = 18,     // ART Thread Register - Managed Runtime (Caller Saved Reg)
+  ETR = 19,     // ART Thread Register - External Calls  (Callee Saved Reg)
   IP0 = 16,     // Used as scratch by VIXL.
   IP1 = 17,     // Used as scratch by ART JNI Assembler.
   FP  = 29,
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 4438f25..86f52aa 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -51,6 +51,11 @@
     }
   }
 
+  // Helper function needed since TEST_F makes a new class.
+  Thread::tls_ptr_sized_values* GetTlsPtr(Thread* self) {
+    return &self->tlsPtr_;
+  }
+
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
     // Push a transition back into managed code onto the linked list in thread.
     ManagedStack fragment;
@@ -727,13 +732,6 @@
 #endif
 }
 
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_object_rosalloc(void);
-extern "C" void art_quick_alloc_object_resolved_rosalloc(void);
-extern "C" void art_quick_alloc_object_initialized_rosalloc(void);
-#endif
-
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
@@ -751,13 +749,12 @@
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-
   {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c->GetVirtualMethod(0)),  // arbitrary
                             0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObject),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
@@ -771,7 +768,7 @@
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
     size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_resolved_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectResolved),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
@@ -785,7 +782,7 @@
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
     size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
@@ -827,7 +824,7 @@
         sirt_refs.push_back(ref);
       }
     }
-    LOG(DEBUG) << "Used " << sirt_refs.size() << " arrays to fill space.";
+    LOG(INFO) << "Used " << sirt_refs.size() << " arrays to fill space.";
 
     // Allocate simple objects till it fails.
     while (!self->IsExceptionPending()) {
@@ -842,7 +839,7 @@
     self->ClearException();
 
     size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocObjectInitialized),
                             self);
 
     EXPECT_TRUE(self->IsExceptionPending());
@@ -866,12 +863,6 @@
 #endif
 }
 
-
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
-extern "C" void art_quick_alloc_array_rosalloc(void);
-extern "C" void art_quick_alloc_array_resolved_rosalloc(void);
-#endif
-
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
@@ -902,7 +893,7 @@
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
                             10U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArray),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
@@ -917,7 +908,7 @@
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
     size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 10U,
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
@@ -937,7 +928,7 @@
   {
     size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr),
                             GB,  // that should fail...
-                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+                            reinterpret_cast<uintptr_t>(GetTlsPtr(self)->quick_entrypoints.pAllocArrayResolved),
                             self);
 
     EXPECT_TRUE(self->IsExceptionPending());
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index c4461fa..6944278 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -286,17 +286,18 @@
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
   bool class_linker;  // Enabled with "-verbose:class".
-  bool verifier;
   bool compiler;
-  bool heap;
   bool gc;
+  bool heap;
   bool jdwp;
   bool jni;
   bool monitor;
+  bool profiler;
+  bool signals;
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
-  bool signals;
+  bool verifier;
 };
 
 extern LogVerbosity gLogVerbosity;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1efd2e0..22a0e22 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -595,17 +595,17 @@
 void Dbg::GcDidFinish() {
   if (gDdmHpifWhen != HPIF_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Sending heap info to DDM";
+    VLOG(jdwp) << "Sending heap info to DDM";
     DdmSendHeapInfo(gDdmHpifWhen);
   }
   if (gDdmHpsgWhen != HPSG_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Dumping heap to DDM";
+    VLOG(jdwp) << "Dumping heap to DDM";
     DdmSendHeapSegments(false);
   }
   if (gDdmNhsgWhen != HPSG_WHEN_NEVER) {
     ScopedObjectAccess soa(Thread::Current());
-    LOG(DEBUG) << "Dumping native heap to DDM";
+    VLOG(jdwp) << "Dumping native heap to DDM";
     DdmSendHeapSegments(true);
   }
 }
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 283faa2..4d7fd0a 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -35,13 +35,6 @@
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
 
-extern "C" {
-void art_sigsegv_fault() {
-  // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART.
-  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
-}
-}
-
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
   fault_manager.HandleFault(sig, info, context);
@@ -67,11 +60,15 @@
 }
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
-  LOG(DEBUG) << "Handling fault";
+  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
+  //
+  // If malloc calls abort, it will be holding its lock.
+  // If the handler tries to call malloc, it will deadlock.
+  VLOG(signals) << "Handling fault";
   if (IsInGeneratedCode(context, true)) {
-    LOG(DEBUG) << "in generated code, looking for handler";
+    VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
-      LOG(DEBUG) << "invoking Action on handler " << handler;
+      VLOG(signals) << "invoking Action on handler " << handler;
       if (handler->Action(sig, info, context)) {
         return;
       }
@@ -82,10 +79,7 @@
       return;
     }
   }
-
-  // Allow the user to catch this problem with a simple breakpoint in art_sigsegv_fault.
-  art_sigsegv_fault();
-
+  LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
   oldaction_.sa_sigaction(sig, info, context);
 }
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 188b6b3..a58df8e 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -39,9 +39,6 @@
 
 void ReferenceProcessor::DisableSlowPath(Thread* self) {
   slow_path_enabled_ = false;
-  // Set to null so that GetReferent knows to not attempt to use the callback for seeing if
-  // referents are marked.
-  process_references_args_.is_marked_callback_ = nullptr;
   condition_.Broadcast(self);
 }
 
@@ -57,13 +54,17 @@
   }
   MutexLock mu(self, lock_);
   while (slow_path_enabled_) {
+    mirror::Object* const referent = reference->GetReferent();
+    // If the referent became cleared, return it.
+    if (referent == nullptr) {
+      return nullptr;
+    }
     // Try to see if the referent is already marked by using the is_marked_callback. We can return
     // it to the mutator as long as the GC is not preserving references. If the GC is
     // preserving references, the mutator could take a white field and move it somewhere else
     // in the heap causing corruption since this field would get swept.
     IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_;
     if (!preserving_references_ && is_marked_callback != nullptr) {
-      mirror::Object* const referent = reference->GetReferent();
       mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_);
       // If it's null it means not marked, but it could become marked if the referent is reachable
       // by finalizer referents. So we can not return in this case and must block.
@@ -107,14 +108,9 @@
     process_references_args_.is_marked_callback_ = is_marked_callback;
     process_references_args_.mark_callback_ = mark_object_callback;
     process_references_args_.arg_ = arg;
+    CHECK_EQ(slow_path_enabled_, concurrent) << "Slow path must be enabled iff concurrent";
   }
-  if (concurrent) {
-    MutexLock mu(self, lock_);
-    CHECK(slow_path_enabled_) << "Slow path must be enabled for concurrent reference processing";
-    timings->StartSplit("ProcessReferences");
-  } else {
-    timings->StartSplit("(Paused)ProcessReferences");
-  }
+  timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences");
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
     TimingLogger::ScopedSplit split(concurrent ? "PreserveSomeSoftReferences" :
@@ -125,7 +121,6 @@
     // References with a marked referent are removed from the list.
     soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
                                                      &process_references_args_);
-
     process_mark_stack_callback(arg);
     if (concurrent) {
       StopPreservingReferences(self);
@@ -158,10 +153,17 @@
   DCHECK(weak_reference_queue_.IsEmpty());
   DCHECK(finalizer_reference_queue_.IsEmpty());
   DCHECK(phantom_reference_queue_.IsEmpty());
-  if (concurrent) {
+  {
     MutexLock mu(self, lock_);
-    // Done processing, disable the slow path and broadcast to the waiters.
-    DisableSlowPath(self);
+    // Need to always do this since the next GC may be concurrent. Doing this for only concurrent
+    // could result in a stale is_marked_callback_ being called before the reference processing
+    // starts since there is a small window of time where slow_path_enabled_ is enabled but the
+    // callback isn't yet set.
+    process_references_args_.is_marked_callback_ = nullptr;
+    if (concurrent) {
+      // Done processing, disable the slow path and broadcast to the waiters.
+      DisableSlowPath(self);
+    }
   }
   timings->EndSplit();
 }
@@ -172,7 +174,7 @@
                                                 IsMarkedCallback is_marked_callback, void* arg) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
   DCHECK(klass->IsReferenceClass());
-  mirror::Object* referent = ref->GetReferent();
+  mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
   if (referent != nullptr) {
     mirror::Object* forward_address = is_marked_callback(referent, arg);
     // Null means that the object is not currently marked.
@@ -219,4 +221,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index d2bd9a4..caacef5 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -109,7 +109,7 @@
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
@@ -136,7 +136,7 @@
                                                 void* arg) {
   while (!IsEmpty()) {
     mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = is_marked_callback(referent, arg);
       // If the referent isn't marked, mark it and update the
@@ -164,7 +164,7 @@
   ReferenceQueue cleared;
   while (!IsEmpty()) {
     mirror::Reference* ref = DequeuePendingReference();
-    mirror::Object* referent = ref->GetReferent();
+    mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
     if (referent != nullptr) {
       mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
@@ -180,4 +180,3 @@
 
 }  // namespace gc
 }  // namespace art
-
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 858582e..a7795e6 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -144,8 +144,8 @@
   std::string error_msg;
   bool is_system = false;
   if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) {
-    ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location,
-                                         !is_system, &error_msg);
+    ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system,
+                                         &error_msg);
     if (space != nullptr) {
       return space;
     }
diff --git a/runtime/image.h b/runtime/image.h
index ce2bc58..abe1ad8 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -91,7 +91,7 @@
   static std::string GetOatLocationFromImageLocation(const std::string& image) {
     std::string oat_filename = image;
     if (oat_filename.length() <= 3) {
-      return oat_filename + ".oat";
+      oat_filename += ".oat";
     } else {
       oat_filename.replace(oat_filename.length() - 3, 3, "oat");
     }
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
new file mode 100644
index 0000000..1a28347
--- /dev/null
+++ b/runtime/indirect_reference_table-inl.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
+#define ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
+
+#include "indirect_reference_table.h"
+
+#include "verify_object-inl.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+
+// Verifies that the indirect table lookup is valid.
+// Returns "false" if something looks bad.
+inline bool IndirectReferenceTable::GetChecked(IndirectRef iref) const {
+  if (UNLIKELY(iref == nullptr)) {
+    LOG(WARNING) << "Attempt to look up NULL " << kind_;
+    return false;
+  }
+  if (UNLIKELY(GetIndirectRefKind(iref) == kSirtOrInvalid)) {
+    LOG(ERROR) << "JNI ERROR (app bug): invalid " << kind_ << " " << iref;
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  const int topIndex = segment_state_.parts.topIndex;
+  int idx = ExtractIndex(iref);
+  if (UNLIKELY(idx >= topIndex)) {
+    LOG(ERROR) << "JNI ERROR (app bug): accessed stale " << kind_ << " "
+               << iref << " (index " << idx << " in a table of size " << topIndex << ")";
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  if (UNLIKELY(table_[idx] == nullptr)) {
+    LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref;
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  if (UNLIKELY(!CheckEntry("use", iref, idx))) {
+    return false;
+  }
+  return true;
+}
+
+// Make sure that the entry at "idx" is correctly paired with "iref".
+inline bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
+  const mirror::Object* obj = table_[idx];
+  IndirectRef checkRef = ToIndirectRef(obj, idx);
+  if (UNLIKELY(checkRef != iref)) {
+    LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what
+               << " stale " << kind_ << " " << iref
+               << " (should be " << checkRef << ")";
+    AbortIfNoCheckJNI();
+    return false;
+  }
+  return true;
+}
+
+inline mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
+  if (!GetChecked(iref)) {
+    return kInvalidIndirectRefObject;
+  }
+  mirror::Object* obj = table_[ExtractIndex(iref)];
+  if (LIKELY(obj != kClearedJniWeakGlobal)) {
+    VerifyObject(obj);
+  }
+  return obj;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_INDIRECT_REFERENCE_TABLE_INL_H_
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 987df91..b81e43a 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include "indirect_reference_table.h"
+#include "indirect_reference_table-inl.h"
+
 #include "jni_internal.h"
 #include "reference_table.h"
 #include "runtime.h"
@@ -53,7 +54,7 @@
   return os;
 }
 
-static void AbortMaybe() {
+void IndirectReferenceTable::AbortIfNoCheckJNI() {
   // If -Xcheck:jni is on, it'll give a more detailed error before aborting.
   if (!Runtime::Current()->GetJavaVM()->check_jni) {
     // Otherwise, we want to abort rather than hand back a bad reference.
@@ -67,12 +68,23 @@
   CHECK_LE(initialCount, maxCount);
   CHECK_NE(desiredKind, kSirtOrInvalid);
 
-  table_ = reinterpret_cast<mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
-  CHECK(table_ != NULL);
-  memset(table_, 0xd1, initialCount * sizeof(const mirror::Object*));
+  std::string error_str;
+  const size_t initial_bytes = initialCount * sizeof(const mirror::Object*);
+  const size_t table_bytes = maxCount * sizeof(const mirror::Object*);
+  table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
+                                            PROT_READ | PROT_WRITE, false, &error_str));
+  CHECK(table_mem_map_.get() != nullptr) << error_str;
 
-  slot_data_ = reinterpret_cast<IndirectRefSlot*>(calloc(initialCount, sizeof(IndirectRefSlot)));
-  CHECK(slot_data_ != NULL);
+  table_ = reinterpret_cast<mirror::Object**>(table_mem_map_->Begin());
+  CHECK(table_ != nullptr);
+  memset(table_, 0xd1, initial_bytes);
+
+  const size_t slot_bytes = maxCount * sizeof(IndirectRefSlot);
+  slot_mem_map_.reset(MemMap::MapAnonymous("indirect ref table slots", nullptr, slot_bytes,
+                                           PROT_READ | PROT_WRITE, false, &error_str));
+  CHECK(slot_mem_map_.get() != nullptr) << error_str;
+  slot_data_ = reinterpret_cast<IndirectRefSlot*>(slot_mem_map_->Begin());
+  CHECK(slot_data_ != nullptr);
 
   segment_state_.all = IRT_FIRST_SEGMENT;
   alloc_entries_ = initialCount;
@@ -81,25 +93,6 @@
 }
 
 IndirectReferenceTable::~IndirectReferenceTable() {
-  free(table_);
-  free(slot_data_);
-  table_ = NULL;
-  slot_data_ = NULL;
-  alloc_entries_ = max_entries_ = -1;
-}
-
-// Make sure that the entry at "idx" is correctly paired with "iref".
-bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
-  const mirror::Object* obj = table_[idx];
-  IndirectRef checkRef = ToIndirectRef(obj, idx);
-  if (UNLIKELY(checkRef != iref)) {
-    LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what
-               << " stale " << kind_ << " " << iref
-               << " (should be " << checkRef << ")";
-    AbortMaybe();
-    return false;
-  }
-  return true;
 }
 
 IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
@@ -127,20 +120,6 @@
     }
     DCHECK_GT(newSize, alloc_entries_);
 
-    table_ = reinterpret_cast<mirror::Object**>(realloc(table_, newSize * sizeof(mirror::Object*)));
-    slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_,
-                                                            newSize * sizeof(IndirectRefSlot)));
-    if (table_ == NULL || slot_data_ == NULL) {
-      LOG(FATAL) << "JNI ERROR (app bug): unable to expand "
-                 << kind_ << " table (from "
-                 << alloc_entries_ << " to " << newSize
-                 << ", max=" << max_entries_ << ")\n"
-                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
-    }
-
-    // Clear the newly-allocated slot_data_ elements.
-    memset(slot_data_ + alloc_entries_, 0, (newSize - alloc_entries_) * sizeof(IndirectRefSlot));
-
     alloc_entries_ = newSize;
   }
 
@@ -185,55 +164,6 @@
   }
 }
 
-// Verifies that the indirect table lookup is valid.
-// Returns "false" if something looks bad.
-bool IndirectReferenceTable::GetChecked(IndirectRef iref) const {
-  if (UNLIKELY(iref == NULL)) {
-    LOG(WARNING) << "Attempt to look up NULL " << kind_;
-    return false;
-  }
-  if (UNLIKELY(GetIndirectRefKind(iref) == kSirtOrInvalid)) {
-    LOG(ERROR) << "JNI ERROR (app bug): invalid " << kind_ << " " << iref;
-    AbortMaybe();
-    return false;
-  }
-
-  int topIndex = segment_state_.parts.topIndex;
-  int idx = ExtractIndex(iref);
-  if (UNLIKELY(idx >= topIndex)) {
-    LOG(ERROR) << "JNI ERROR (app bug): accessed stale " << kind_ << " "
-               << iref << " (index " << idx << " in a table of size " << topIndex << ")";
-    AbortMaybe();
-    return false;
-  }
-
-  if (UNLIKELY(table_[idx] == NULL)) {
-    LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref;
-    AbortMaybe();
-    return false;
-  }
-
-  if (UNLIKELY(!CheckEntry("use", iref, idx))) {
-    return false;
-  }
-
-  return true;
-}
-
-static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex,
-                mirror::Object** table) {
-  for (int i = bottomIndex; i < topIndex; ++i) {
-    if (table[i] == direct_pointer) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-bool IndirectReferenceTable::ContainsDirectPointer(mirror::Object* direct_pointer) const {
-  return Find(direct_pointer, 0, segment_state_.parts.topIndex, table_) != -1;
-}
-
 // Removes an object. We extract the table offset bits from "iref"
 // and zap the corresponding entry, leaving a hole if it's not at the top.
 // If the entry is not between the current top index and the bottom index
@@ -346,15 +276,4 @@
   ReferenceTable::Dump(os, entries);
 }
 
-mirror::Object* IndirectReferenceTable::Get(IndirectRef iref) const {
-  if (!GetChecked(iref)) {
-    return kInvalidIndirectRefObject;
-  }
-  mirror::Object* obj = table_[ExtractIndex(iref)];;
-  if (obj != kClearedJniWeakGlobal) {
-    VerifyObject(obj);
-  }
-  return obj;
-}
-
 }  // namespace art
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index a2de726..f365acc 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -24,6 +24,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "mem_map.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -72,7 +73,7 @@
  * To make everything fit nicely in 32-bit integers, the maximum size of
  * the table is capped at 64K.
  *
- * None of the table functions are synchronized.
+ * Only SynchronizedGet is synchronized.
  */
 
 /*
@@ -191,11 +192,6 @@
  * and local refs to improve performance.  A large circular buffer might
  * reduce the amortized cost of adding global references.
  *
- * TODO: if we can guarantee that the underlying storage doesn't move,
- * e.g. by using oversized mmap regions to handle expanding tables, we may
- * be able to avoid having to synchronize lookups.  Might make sense to
- * add a "synchronized lookup" call that takes the mutex as an argument,
- * and either locks or doesn't lock based on internal details.
  */
 union IRTSegmentState {
   uint32_t          all;
@@ -234,7 +230,7 @@
     }
   }
 
-  mirror::Object** table_;
+  mirror::Object** const table_;
   size_t i_;
   size_t capacity_;
 };
@@ -267,10 +263,15 @@
    *
    * Returns kInvalidIndirectRefObject if iref is invalid.
    */
-  mirror::Object* Get(IndirectRef iref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* Get(IndirectRef iref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      ALWAYS_INLINE;
 
-  // TODO: remove when we remove work_around_app_jni_bugs support.
-  bool ContainsDirectPointer(mirror::Object* direct_pointer) const;
+  // Synchronized get which reads a reference, acquiring a lock if necessary.
+  mirror::Object* SynchronizedGet(Thread* /*self*/, ReaderWriterMutex* /*mutex*/,
+                                  IndirectRef iref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return Get(iref);
+  }
 
   /*
    * Remove an existing entry.
@@ -351,6 +352,9 @@
     }
   }
 
+  // Abort if check_jni is not enabled.
+  static void AbortIfNoCheckJNI();
+
   /* extra debugging checks */
   bool GetChecked(IndirectRef) const;
   bool CheckEntry(const char*, IndirectRef, int) const;
@@ -358,6 +362,10 @@
   /* semi-public - read/write by jni down calls */
   IRTSegmentState segment_state_;
 
+  // Mem map where we store the indirect refs.
+  UniquePtr<MemMap> table_mem_map_;
+  // Mem map where we store the extended debugging info.
+  UniquePtr<MemMap> slot_mem_map_;
   /* bottom of the stack */
   mirror::Object** table_;
   /* bit mask, ORed into all irefs */
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index 9b42e59..449817a 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "indirect_reference_table.h"
+#include "indirect_reference_table-inl.h"
 
 #include "common_runtime_test.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index cbf35be..fe91bb6 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -362,7 +362,7 @@
       }
 
       if (wake_pipe_[0] >= 0 && FD_ISSET(wake_pipe_[0], &readfds)) {
-        LOG(DEBUG) << "Got wake-up signal, bailing out of select";
+        VLOG(jdwp) << "Got wake-up signal, bailing out of select";
         goto fail;
       }
       if (control_sock_ >= 0 && FD_ISSET(control_sock_, &readfds)) {
@@ -385,7 +385,7 @@
           if (errno != EINTR) {
             goto fail;
           }
-          LOG(DEBUG) << "+++ EINTR hit";
+          VLOG(jdwp) << "+++ EINTR hit";
           return true;
         } else if (readCount == 0) {
           /* EOF hit -- far end went away */
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 223b7a1..4e2b0f8 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -280,7 +280,7 @@
   if (found) {
     Dbg::ManageDeoptimization();
   } else {
-    LOG(DEBUG) << StringPrintf("Odd: no match when removing event reqId=0x%04x", requestId);
+    LOG(WARNING) << StringPrintf("Odd: no match when removing event reqId=0x%04x", requestId);
   }
 }
 
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index f480256..2419ca6 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -573,7 +573,7 @@
  */
 int64_t JdwpState::LastDebuggerActivity() {
   if (!Dbg::IsDebuggerActive()) {
-    LOG(DEBUG) << "no active debugger";
+    LOG(WARNING) << "no active debugger";
     return -1;
   }
 
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 3f5546e..4a80957 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -416,7 +416,7 @@
         if (listenSock >= 0) {
           LOG(ERROR) << "Exit wake set, but not exiting?";
         } else {
-          LOG(DEBUG) << "Got wake-up signal, bailing out of select";
+          VLOG(jdwp) << "Got wake-up signal, bailing out of select";
         }
         goto fail;
       }
@@ -442,7 +442,7 @@
           if (errno != EINTR) {
             goto fail;
           }
-          LOG(DEBUG) << "+++ EINTR hit";
+          VLOG(jdwp) << "+++ EINTR hit";
           return true;
         } else if (readCount == 0) {
           /* EOF hit -- far end went away */
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index e6a35d0..915f2c9 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -29,6 +29,7 @@
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "indirect_reference_table-inl.h"
 #include "interpreter/interpreter.h"
 #include "jni.h"
 #include "mirror/art_field-inl.h"
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index ec911b2..cdf3c47 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -116,7 +116,8 @@
 
   // JNI global references.
   ReaderWriterMutex globals_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
-  IndirectReferenceTable globals GUARDED_BY(globals_lock);
+  // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject.
+  IndirectReferenceTable globals;
 
   Mutex libraries_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   Libraries* libraries GUARDED_BY(libraries_lock);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 14fc25c..778b9e5 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -987,9 +987,6 @@
     // Our local reference for the survivor is invalid because the survivor
     // gets a new local reference...
     EXPECT_EQ(JNIInvalidRefType, env_->GetObjectRefType(inner2));
-    // ...but the survivor should be in the local reference table.
-    JNIEnvExt* env = reinterpret_cast<JNIEnvExt*>(env_);
-    EXPECT_TRUE(env->locals.ContainsDirectPointer(inner2_direct_pointer));
 
     env_->PopLocalFrame(NULL);
   }
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index a556a1c..d454ae8 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -33,8 +33,14 @@
 namespace art {
 namespace mirror {
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline uint32_t Class::GetObjectSize() {
-  DCHECK(!IsVariableSize()) << " class=" << PrettyTypeOf(this);
+  if (kIsDebugBuild) {
+    // Use a local variable as (D)CHECK can't handle the space between
+    // the two template params.
+    bool is_variable_size = IsVariableSize<kVerifyFlags, kReadBarrierOption>();
+    CHECK(!is_variable_size) << " class=" << PrettyTypeOf(this);
+  }
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_));
 }
 
@@ -514,6 +520,13 @@
   return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsClassClass() {
+  Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template GetClass<kVerifyFlags, kReadBarrierOption>();
+  return this == java_lang_Class;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 662303e..ff63782 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -315,11 +315,6 @@
                          ClassHelper(klass2).GetDescriptor());
 }
 
-bool Class::IsClassClass() {
-  Class* java_lang_Class = GetClass()->GetClass();
-  return this == java_lang_Class;
-}
-
 bool Class::IsStringClass() const {
   return this == String::GetJavaLangString();
 }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 00ecead..1f393db 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -366,6 +366,8 @@
     return GetComponentType<kVerifyFlags, kReadBarrierOption>() != NULL;
   }
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsStringClass() const;
@@ -423,10 +425,13 @@
   Object* AllocNonMovableObject(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes and arrays vary in size, and so the object_size_ field cannot
     // be used to get their instance size
-    return IsClassClass() || IsArrayClass();
+    return IsClassClass<kVerifyFlags, kReadBarrierOption>() ||
+        IsArrayClass<kVerifyFlags, kReadBarrierOption>();
   }
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -443,6 +448,8 @@
   void SetClassSize(uint32_t new_class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 064fe30..a2072a2 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -389,7 +389,8 @@
     result = AsClass<kNewFlags, kReadBarrierOption>()->
         template SizeOf<kNewFlags, kReadBarrierOption>();
   } else {
-    result = GetClass<kNewFlags, kReadBarrierOption>()->GetObjectSize();
+    result = GetClass<kNewFlags, kReadBarrierOption>()->
+        template GetObjectSize<kNewFlags, kReadBarrierOption>();
   }
   DCHECK_GE(result, sizeof(Object))
       << " class=" << PrettyTypeOf(GetClass<kNewFlags, kReadBarrierOption>());
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index cf65d20..0b6e759 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -42,8 +42,10 @@
     return OFFSET_OF_OBJECT_MEMBER(Reference, referent_);
   }
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Object* GetReferent() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(ReferentOffset());
+    return GetFieldObjectVolatile<Object, kDefaultVerifyFlags, kReadBarrierOption>(
+        ReferentOffset());
   }
   template<bool kTransactionActive>
   void SetReferent(Object* referent) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1008491..f541633 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -58,12 +58,12 @@
     Runtime* runtime = Runtime::Current();
     JavaVMExt* vm = runtime->GetJavaVM();
     if (!vm->check_jni) {
-      LOG(DEBUG) << "Late-enabling -Xcheck:jni";
+      VLOG(jni) << "Late-enabling -Xcheck:jni";
       vm->SetCheckJniEnabled(true);
       // There's only one thread running at this point, so only one JNIEnv to fix up.
       Thread::Current()->GetJniEnv()->SetCheckJniEnabled(true);
     } else {
-      LOG(DEBUG) << "Not late-enabling -Xcheck:jni (already on)";
+      VLOG(jni) << "Not late-enabling -Xcheck:jni (already on)";
     }
     debug_flags &= ~DEBUG_ENABLE_CHECKJNI;
   }
diff --git a/runtime/oat.cc b/runtime/oat.cc
index c7dda71..a1f4fd0 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '5', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '2', '7', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1562527..8c18dff 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -181,8 +181,16 @@
   parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
   // Only the main GC thread, no workers.
   conc_gc_threads_ = 0;
-  // Default is CMS which is Sticky + Partial + Full CMS GC.
+  // The default GC type is set in makefiles.
+#if ART_DEFAULT_GC_TYPE_IS_CMS
   collector_type_ = gc::kCollectorTypeCMS;
+#elif ART_DEFAULT_GC_TYPE_IS_SS
+  collector_type_ = gc::kCollectorTypeSS;
+#elif ART_DEFAULT_GC_TYPE_IS_GSS
+  collector_type_ = gc::kCollectorTypeGSS;
+#else
+#error "ART default GC type must be set"
+#endif
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
   // parsing options.
   background_collector_type_ = gc::kCollectorTypeNone;
@@ -223,16 +231,17 @@
 
 //  gLogVerbosity.class_linker = true;  // TODO: don't check this in!
 //  gLogVerbosity.compiler = true;  // TODO: don't check this in!
-//  gLogVerbosity.verifier = true;  // TODO: don't check this in!
-//  gLogVerbosity.heap = true;  // TODO: don't check this in!
 //  gLogVerbosity.gc = true;  // TODO: don't check this in!
+//  gLogVerbosity.heap = true;  // TODO: don't check this in!
 //  gLogVerbosity.jdwp = true;  // TODO: don't check this in!
 //  gLogVerbosity.jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.monitor = true;  // TODO: don't check this in!
+//  gLogVerbosity.profiler = true;  // TODO: don't check this in!
+//  gLogVerbosity.signals = true;  // TODO: don't check this in!
 //  gLogVerbosity.startup = true;  // TODO: don't check this in!
 //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.threads = true;  // TODO: don't check this in!
-//  gLogVerbosity.signals = true;  // TODO: don't check this in!
+//  gLogVerbosity.verifier = true;  // TODO: don't check this in!
 
   method_trace_ = false;
   method_trace_file_ = "/data/method-trace-file.bin";
@@ -254,7 +263,7 @@
 #ifdef HAVE_ANDROID_OS
   {
     char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.implicit_checks", buf, "null,stack");
+    property_get("dalvik.vm.implicit_checks", buf, "none");
     std::string checks(buf);
     std::vector<std::string> checkvec;
     Split(checks, ',', checkvec);
@@ -445,28 +454,30 @@
       for (size_t i = 0; i < verbose_options.size(); ++i) {
         if (verbose_options[i] == "class") {
           gLogVerbosity.class_linker = true;
-        } else if (verbose_options[i] == "verifier") {
-          gLogVerbosity.verifier = true;
         } else if (verbose_options[i] == "compiler") {
           gLogVerbosity.compiler = true;
-        } else if (verbose_options[i] == "heap") {
-          gLogVerbosity.heap = true;
         } else if (verbose_options[i] == "gc") {
           gLogVerbosity.gc = true;
+        } else if (verbose_options[i] == "heap") {
+          gLogVerbosity.heap = true;
         } else if (verbose_options[i] == "jdwp") {
           gLogVerbosity.jdwp = true;
         } else if (verbose_options[i] == "jni") {
           gLogVerbosity.jni = true;
         } else if (verbose_options[i] == "monitor") {
           gLogVerbosity.monitor = true;
+        } else if (verbose_options[i] == "profiler") {
+          gLogVerbosity.profiler = true;
+        } else if (verbose_options[i] == "signals") {
+          gLogVerbosity.signals = true;
         } else if (verbose_options[i] == "startup") {
           gLogVerbosity.startup = true;
         } else if (verbose_options[i] == "third-party-jni") {
           gLogVerbosity.third_party_jni = true;
         } else if (verbose_options[i] == "threads") {
           gLogVerbosity.threads = true;
-        } else if (verbose_options[i] == "signals") {
-           gLogVerbosity.signals = true;
+        } else if (verbose_options[i] == "verifier") {
+          gLogVerbosity.verifier = true;
         } else {
           Usage("Unknown -verbose option %s\n", verbose_options[i].c_str());
           return false;
@@ -671,7 +682,9 @@
 
   if (compiler_callbacks_ == nullptr && image_.empty()) {
     image_ += GetAndroidRoot();
-    image_ += "/framework/boot.art";
+    image_ += "/framework/boot-";
+    image_ += GetInstructionSetString(image_isa_);
+    image_ += ".art";
   }
   if (heap_growth_limit_ == 0) {
     heap_growth_limit_ = heap_maximum_size_;
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 7b117f4..6e33f9d 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -147,7 +147,7 @@
 
       startup_delay = 0;
 
-      LOG(DEBUG) << "Delaying profile start for " << delay_secs << " secs";
+      VLOG(profiler) << "Delaying profile start for " << delay_secs << " secs";
       MutexLock mu(self, profiler->wait_lock_);
       profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
 
@@ -167,7 +167,7 @@
     uint64_t end_us = start_us + profiler->duration_s_ * UINT64_C(1000000);
     uint64_t now_us = start_us;
 
-    LOG(DEBUG) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
+    VLOG(profiler) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
 
 
     SampleCheckpoint check_point(profiler);
@@ -221,7 +221,7 @@
       // After the profile has been taken, write it out.
       ScopedObjectAccess soa(self);   // Acquire the mutator lock.
       uint32_t size = profiler->WriteProfile();
-      LOG(DEBUG) << "Profile size: " << size;
+      VLOG(profiler) << "Profile size: " << size;
     }
   }
 
@@ -233,7 +233,7 @@
 // Write out the profile file if we are generating a profile.
 uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
   std::string full_name = profile_file_name_;
-  LOG(DEBUG) << "Saving profile to " << full_name;
+  VLOG(profiler) << "Saving profile to " << full_name;
 
   int fd = open(full_name.c_str(), O_RDWR);
   if (fd < 0) {
@@ -469,7 +469,7 @@
   num_null_methods_ += previous_num_null_methods_;
   num_boot_methods_ += previous_num_boot_methods_;
 
-  LOG(DEBUG) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
+  VLOG(profiler) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
   for (int i = 0 ; i < kHashSize; i++) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6d9dfa6..a390ac6 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -535,20 +535,9 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
-  bool implicit_checks_supported = false;
-  switch (kRuntimeISA) {
-  case kArm:
-  case kThumb2:
-    implicit_checks_supported = true;
-    break;
-  default:
-    break;
-  }
-
-  if (implicit_checks_supported &&
-    (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
+  if (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
         ParsedOptions::kExplicitNullCheck |
-        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
+        ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler) {
     fault_manager.Init();
 
     // These need to be in a specific order.  The null point check handler must be
@@ -940,8 +929,8 @@
 }
 
 void Runtime::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
-  VisitConcurrentRoots(callback, arg, flags);
   VisitNonConcurrentRoots(callback, arg);
+  VisitConcurrentRoots(callback, arg, flags);
 }
 
 mirror::ObjectArray<mirror::ArtMethod>* Runtime::CreateDefaultImt(ClassLinker* cl) {
@@ -1283,17 +1272,9 @@
   // Make the dex2oat instruction set match that of the launching runtime. If we have multiple
   // architecture support, dex2oat may be compiled as a different instruction-set than that
   // currently being executed.
-#if defined(__arm__)
-  argv->push_back("--instruction-set=arm");
-#elif defined(__aarch64__)
-  argv->push_back("--instruction-set=arm64");
-#elif defined(__i386__)
-  argv->push_back("--instruction-set=x86");
-#elif defined(__x86_64__)
-  argv->push_back("--instruction-set=x86_64");
-#elif defined(__mips__)
-  argv->push_back("--instruction-set=mips");
-#endif
+  std::string instruction_set("--instruction-set=");
+  instruction_set += GetInstructionSetString(kRuntimeISA);
+  argv->push_back(instruction_set);
 
   std::string features("--instruction-set-features=");
   features += GetDefaultInstructionSetFeatures();
@@ -1301,6 +1282,6 @@
 }
 
 void Runtime::UpdateProfilerState(int state) {
-  LOG(DEBUG) << "Profiler state updated to " << state;
+  VLOG(profiler) << "Profiler state updated to " << state;
 }
 }  // namespace art
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5e64e59..6667419 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -270,7 +270,7 @@
 void StackVisitor::SanityCheckFrame() const {
   if (kIsDebugBuild) {
     mirror::ArtMethod* method = GetMethod();
-    CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
+    CHECK_EQ(method->GetClass(), mirror::ArtMethod::GetJavaLangReflectArtMethod());
     if (cur_quick_frame_ != nullptr) {
       method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
       // Frame sanity.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3a62cd5..00a66d7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -44,6 +44,7 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
+#include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
@@ -1265,10 +1266,8 @@
       result = kInvalidIndirectRefObject;
     }
   } else if (kind == kGlobal) {
-    JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    IndirectReferenceTable& globals = vm->globals;
-    ReaderMutexLock mu(const_cast<Thread*>(this), vm->globals_lock);
-    result = const_cast<mirror::Object*>(globals.Get(ref));
+    JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
+    result = vm->globals.SynchronizedGet(const_cast<Thread*>(this), &vm->globals_lock, ref);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
     result = Runtime::Current()->GetJavaVM()->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
diff --git a/runtime/thread.h b/runtime/thread.h
index 8c17082..32311e1 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1082,6 +1082,7 @@
   friend class Runtime;  // For CreatePeer.
   friend class ScopedThreadStateChange;
   friend class SignalCatcher;  // For SetStateUnsafe.
+  friend class StubTest;  // For accessing entrypoints.
   friend class ThreadList;  // For ~Thread and Destroy.
 
   DISALLOW_COPY_AND_ASSIGN(Thread);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 4eb580b..31d8d60 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -650,7 +650,7 @@
         // can happen if the debugger lets go while a SIGQUIT thread
         // dump event is pending (assuming SignalCatcher was resumed for
         // just long enough to try to grab the thread-suspend lock).
-        LOG(DEBUG) << *self << " still suspended after undo "
+        LOG(WARNING) << *self << " still suspended after undo "
                    << "(suspend count=" << self->GetSuspendCount() << ")";
       }
     }