Merge "Fix an Art debug build boot failure." into dalvik-dev
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 66ff461..fc2f02b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -74,7 +74,6 @@
 	llvm/md_builder.cc \
 	llvm/runtime_support_builder.cc \
 	llvm/runtime_support_builder_arm.cc \
-	llvm/runtime_support_builder_thumb2.cc \
 	llvm/runtime_support_builder_x86.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/arm/assembler_arm.cc \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 97a682f..17b5bb5 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -412,6 +412,27 @@
 
 std::ostream& operator<<(std::ostream& os, const OatBitMapKind& kind);
 
+// LIR fixup kinds for Arm
+enum FixupKind {
+  kFixupNone,
+  kFixupLabel,       // For labels we just adjust the offset.
+  kFixupLoad,        // Mostly for imediates.
+  kFixupVLoad,       // FP load which *may* be pc-relative.
+  kFixupCBxZ,        // Cbz, Cbnz.
+  kFixupPushPop,     // Not really pc relative, but changes size based on args.
+  kFixupCondBranch,  // Conditional branch
+  kFixupT1Branch,    // Thumb1 Unconditional branch
+  kFixupT2Branch,    // Thumb2 Unconditional branch
+  kFixupBlx1,        // Blx1 (start of Blx1/Blx2 pair).
+  kFixupBl1,         // Bl1 (start of Bl1/Bl2 pair).
+  kFixupAdr,         // Adr.
+  kFixupMovImmLST,   // kThumb2MovImm16LST.
+  kFixupMovImmHST,   // kThumb2MovImm16HST.
+  kFixupAlign4,      // Align to 4-byte boundary.
+};
+
+std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index fefcab9..2952570 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -117,6 +117,11 @@
 #endif
 ) {
   VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  if (code_item->insns_size_in_code_units_ >= 0x10000) {
+    LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_
+              << " in " << PrettyMethod(method_idx, dex_file);
+    return NULL;
+  }
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   CompilationUnit cu(&compiler.GetArenaPool());
@@ -151,7 +156,7 @@
    */
 
   if (compiler_backend == kPortable) {
-    // Fused long branches not currently usseful in bitcode.
+    // Fused long branches not currently useful in bitcode.
     cu.disable_opt |= (1 << kBranchFusing);
   }
 
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 8472a3c..8597172 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1032,6 +1032,14 @@
    */
   if (GetNumDalvikInsns() > Runtime::Current()->GetHugeMethodThreshold()) {
     skip_compilation = true;
+    // If we're got a huge number of basic blocks, don't bother with further analysis.
+    if (static_cast<size_t>(num_blocks_) > (Runtime::Current()->GetHugeMethodThreshold() / 2)) {
+      return true;
+    }
+  } else if (GetNumDalvikInsns() > Runtime::Current()->GetLargeMethodThreshold() &&
+    /* If it's large and contains no branches, it's likely to be machine generated initialization */
+      (GetBranchCount() == 0)) {
+    return true;
   } else if (compiler_filter == Runtime::kSpeed) {
     // If not huge, compile.
     return false;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index c234298..fb306de 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -96,10 +96,9 @@
       try_block_addr_(NULL),
       entry_block_(NULL),
       exit_block_(NULL),
-      cur_block_(NULL),
       num_blocks_(0),
       current_code_item_(NULL),
-      block_map_(arena, 0, kGrowableArrayMisc),
+      dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc),
       current_method_(kInvalidEntry),
       current_offset_(kInvalidEntry),
       def_count_(0),
@@ -109,7 +108,9 @@
       attributes_(METHOD_IS_LEAF),  // Start with leaf assumption, change on encountering invoke.
       checkstats_(NULL),
       special_case_(kNoHandler),
-      arena_(arena) {
+      arena_(arena),
+      backward_branches_(0),
+      forward_branches_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
 }
 
@@ -151,7 +152,7 @@
   orig_block->terminated_by_return = false;
 
   /* Add it to the quick lookup cache */
-  block_map_.Put(bottom_block->start_offset, bottom_block);
+  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
 
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
@@ -196,6 +197,23 @@
     DCHECK_EQ(*immed_pred_block_p, orig_block);
     *immed_pred_block_p = bottom_block;
   }
+
+  // Associate dex instructions in the bottom block with the new container.
+  MIR* p = bottom_block->first_mir_insn;
+  while (p != NULL) {
+    int opcode = p->dalvikInsn.opcode;
+    /*
+     * Some messiness here to ensure that we only enter real opcodes and only the
+     * first half of a potentially throwing instruction that has been split into
+     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
+     * throw_insn pointer that refers to the 1st half.
+     */
+    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+      dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+    }
+    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
+  }
+
   return bottom_block;
 }
 
@@ -209,39 +227,37 @@
  */
 BasicBlock* MIRGraph::FindBlock(unsigned int code_offset, bool split, bool create,
                                 BasicBlock** immed_pred_block_p) {
-  BasicBlock* bb;
-  unsigned int i;
-
   if (code_offset >= cu_->code_item->insns_size_in_code_units_) {
     return NULL;
   }
-  bb = block_map_.Get(code_offset);
-  if ((bb != NULL) || !create) {
+
+  int block_id = dex_pc_to_block_map_.Get(code_offset);
+  BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id);
+
+  if ((bb != NULL) && (bb->start_offset == code_offset)) {
+    // Does this containing block start with the desired instruction?
     return bb;
   }
 
-  if (split) {
-    for (i = block_list_.Size(); i > 0; i--) {
-      bb = block_list_.Get(i - 1);
-      if (bb->block_type != kDalvikByteCode) continue;
-      /* Check if a branch jumps into the middle of an existing block */
-      if ((code_offset > bb->start_offset) && (bb->last_mir_insn != NULL) &&
-          (code_offset <= bb->last_mir_insn->offset)) {
-        BasicBlock *new_bb = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?
-                                       immed_pred_block_p : NULL);
-        return new_bb;
-      }
-    }
+  // No direct hit.
+  if (!create) {
+    return NULL;
   }
 
-  /* Create a new one */
+  if (bb != NULL) {
+    // The target exists somewhere in an existing block.
+    return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : NULL);
+  }
+
+  // Create a new block.
   bb = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(bb);
   bb->start_offset = code_offset;
-  block_map_.Put(bb->start_offset, bb);
+  dex_pc_to_block_map_.Put(bb->start_offset, bb->id);
   return bb;
 }
 
+
 /* Identify code range in try blocks and set up the empty catch blocks */
 void MIRGraph::ProcessTryCatchBlocks() {
   int tries_size = current_code_item_->tries_size_;
@@ -307,6 +323,7 @@
     default:
       LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set";
   }
+  CountBranch(target);
   BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true,
                                       /* immed_pred_block_p */ &cur_block);
   cur_block->taken = taken_block;
@@ -485,6 +502,9 @@
    * pseudo exception edge MIR.  Note also that this new block is
    * not automatically terminated after the work portion, and may
    * contain following instructions.
+   *
+   * Note also that the dex_pc_to_block_map_ entry for the potentially
+   * throwing instruction will refer to the original basic block.
    */
   BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(new_block);
@@ -518,8 +538,9 @@
       current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
 
   // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
+  // TUNING: use better estimate of basic blocks for following resize.
   block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_);
-  block_map_.SetSize(block_map_.Size() + current_code_item_->insns_size_in_code_units_);
+  dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -560,10 +581,7 @@
   DCHECK_EQ(current_offset_, 0);
   cur_block->start_offset = current_offset_;
   block_list_.Insert(cur_block);
-  /* Add first block to the fast lookup cache */
-// FIXME: block map needs association with offset/method pair rather than just offset
-  block_map_.Put(cur_block->start_offset, cur_block);
-// FIXME: this needs to insert at the insert point rather than entry block.
+  // FIXME: this needs to insert at the insert point rather than entry block.
   entry_block_->fall_through = cur_block;
   cur_block->predecessors->Insert(entry_block_);
 
@@ -589,7 +607,6 @@
       opcode_count_[static_cast<int>(opcode)]++;
     }
 
-
     /* Possible simple method? */
     if (live_pattern) {
       live_pattern = false;
@@ -640,6 +657,9 @@
       AppendMIR(cur_block, insn);
     }
 
+    // Associate the starting dex_pc for this opcode with its containing basic block.
+    dex_pc_to_block_map_.Put(insn->offset, cur_block->id);
+
     code_ptr += width;
 
     if (flags & Instruction::kBranch) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 9d4ab98..5d01489 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -569,6 +569,26 @@
     return IsBackedge(branch_bb, branch_bb->taken) || IsBackedge(branch_bb, branch_bb->fall_through);
   }
 
+  void CountBranch(int target_offset) {
+    if (target_offset <= current_offset_) {
+      backward_branches_++;
+    } else {
+      forward_branches_++;
+    }
+  }
+
+  int GetBranchCount() {
+    return backward_branches_ + forward_branches_;
+  }
+
+  bool IsPseudoMirOp(Instruction::Code opcode) {
+    return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
+  }
+
+  bool IsPseudoMirOp(int opcode) {
+    return opcode >= static_cast<int>(kMirOpFirst);
+  }
+
   void BasicBlockCombine();
   void CodeLayout();
   void DumpCheckStats();
@@ -725,15 +745,14 @@
   ArenaBitVector* try_block_addr_;
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
-  BasicBlock* cur_block_;
   int num_blocks_;
   const DexFile::CodeItem* current_code_item_;
-  GrowableArray<BasicBlock*> block_map_;         // FindBlock lookup cache.
+  GrowableArray<uint16_t> dex_pc_to_block_map_;  // FindBlock lookup cache.
   std::vector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
   typedef std::pair<int, int> MIRLocation;       // Insert point, (m_unit_ index, offset)
   std::vector<MIRLocation> method_stack_;        // Include stack
   int current_method_;
-  int current_offset_;
+  int current_offset_;                           // Dex offset in code units
   int def_count_;                                // Used to estimate size of ssa name storage.
   int* opcode_count_;                            // Dex opcode coverage stats.
   int num_ssa_regs_;                             // Number of names following SSA transformation.
@@ -743,6 +762,8 @@
   Checkstats* checkstats_;
   SpecialCaseHandler special_case_;
   ArenaAllocator* arena_;
+  int backward_branches_;
+  int forward_branches_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 2f54190..d184673 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -462,7 +462,7 @@
 
 // Instruction assembly field_loc kind.
 enum ArmEncodingKind {
-  kFmtUnused,
+  kFmtUnused,    // Unused field and marks end of formats.
   kFmtBitBlt,    // Bit string using end/start.
   kFmtDfp,       // Double FP reg.
   kFmtSfp,       // Single FP reg.
@@ -477,6 +477,7 @@
   kFmtBrOffset,  // Signed extended [26,11,13,21-16,10-0]:0.
   kFmtFPImm,     // Encoded floating point immediate.
   kFmtOff24,     // 24-bit Thumb2 unconditional branch encoding.
+  kFmtSkip,      // Unused field, but continue to next.
 };
 
 // Struct used to define the snippet positions for each Thumb opcode.
@@ -492,6 +493,7 @@
   const char* name;
   const char* fmt;
   int size;   // Note: size is in bytes.
+  FixupKind fixup;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 2d69d93..dac3a21 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -37,9 +37,9 @@
  * fmt: for pretty-printing
  */
 #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
-                     k3, k3s, k3e, flags, name, fmt, size) \
+                     k3, k3s, k3e, flags, name, fmt, size, fixup) \
         {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
-                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size}
+                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
 
 /* Instruction dump string format keys: !pf, where "!" is the start
  * of the key, "p" is which numeric operand to use and "f" is the
@@ -79,916 +79,938 @@
 const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
     ENCODING_MAP(kArm16BitData,    0x0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
     ENCODING_MAP(kThumbAdcRR,        0x4140,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C", 2),
+                 "adcs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRI3,      0x1c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2d", 2),
+                 "adds", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRI8,       0x3000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "adds", "!0C, !0C, #!1d", 2),
+                 "adds", "!0C, !0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRR,       0x1800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C", 2),
+                 "adds", "!0C, !1C, !2C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRLH,     0x4440,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRHL,     0x4480,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddRRHH,     0x44c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
-                 "add", "!0C, !1C", 2),
+                 "add", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAddPcRel,    0xa000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "add", "!0C, pc, #!1E", 2),
+                 "add", "!0C, pc, #!1E", 2, kFixupLoad),
     ENCODING_MAP(kThumbAddSpRel,    0xa800,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "!0C, sp, #!2E", 2),
+                 "add", "!0C, sp, #!2E", 2, kFixupNone),
     ENCODING_MAP(kThumbAddSpI7,      0xb000,
                  kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "add", "sp, #!0d*4", 2),
+                 "add", "sp, #!0d*4", 2, kFixupNone),
     ENCODING_MAP(kThumbAndRR,        0x4000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "ands", "!0C, !1C", 2),
+                 "ands", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbAsrRRI5,      0x1000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "asrs", "!0C, !1C, #!2d", 2),
+                 "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbAsrRR,        0x4100,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "asrs", "!0C, !1C", 2),
+                 "asrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbBCond,        0xd000,
                  kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
-                 NEEDS_FIXUP, "b!1c", "!0t", 2),
+                 NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
     ENCODING_MAP(kThumbBUncond,      0xe000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
-                 "b", "!0t", 2),
+                 "b", "!0t", 2, kFixupT1Branch),
     ENCODING_MAP(kThumbBicRR,        0x4380,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "bics", "!0C, !1C", 2),
+                 "bics", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbBkpt,          0xbe00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bkpt", "!0d", 2),
+                 "bkpt", "!0d", 2, kFixupNone),
     ENCODING_MAP(kThumbBlx1,         0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_1", "!0u", 2),
+                 NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
     ENCODING_MAP(kThumbBlx2,         0xe800,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
-                 NEEDS_FIXUP, "blx_2", "!0v", 2),
+                 NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
     ENCODING_MAP(kThumbBl1,          0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "bl_1", "!0u", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_1", "!0u", 2, kFixupBl1),
     ENCODING_MAP(kThumbBl2,          0xf800,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "bl_2", "!0v", 2),
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl_2", "!0v", 2, kFixupLabel),
     ENCODING_MAP(kThumbBlxR,         0x4780,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
-                 "blx", "!0C", 2),
+                 "blx", "!0C", 2, kFixupNone),
     ENCODING_MAP(kThumbBx,            0x4700,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "bx", "!0C", 2),
+                 "bx", "!0C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmnRR,        0x42c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmn", "!0C, !1C", 2),
+                 "cmn", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpRI8,       0x2800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1d", 2),
+                 "cmp", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpRR,        0x4280,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpLH,        0x4540,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpHL,        0x4580,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbCmpHH,        0x45c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 2),
+                 "cmp", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbEorRR,        0x4040,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "eors", "!0C, !1C", 2),
+                 "eors", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbLdmia,         0xc800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 2),
+                 "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRI5,      0x6800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2E]", 2),
+                 "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrRRR,       0x5800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C]", 2),
+                 "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrPcRel,    0x4800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2),
+                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
     ENCODING_MAP(kThumbLdrSpRel,    0x9800,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
-                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2),
+                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #2d]", 2),
+                 "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C]", 2),
+                 "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2F]", 2),
+                 "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C]", 2),
+                 "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C]", 2),
+                 "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C]", 2),
+                 "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbLslRRI5,      0x0000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsls", "!0C, !1C, #!2d", 2),
+                 "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbLslRR,        0x4080,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsls", "!0C, !1C", 2),
+                 "lsls", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbLsrRRI5,      0x0800,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "lsrs", "!0C, !1C, #!2d", 2),
+                 "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbLsrRR,        0x40c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "lsrs", "!0C, !1C", 2),
+                 "lsrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovImm,       0x2000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
-                 "movs", "!0C, #!1d", 2),
+                 "movs", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR,        0x1c00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "movs", "!0C, !1C", 2),
+                 "movs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 2),
+                 "mov", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMul,           0x4340,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "muls", "!0C, !1C", 2),
+                 "muls", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbMvn,           0x43c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "mvns", "!0C, !1C", 2),
+                 "mvns", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbNeg,           0x4240,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "negs", "!0C, !1C", 2),
+                 "negs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbOrr,           0x4300,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "orrs", "!0C, !1C", 2),
+                 "orrs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbPop,           0xbc00,
                  kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 2),
+                 | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
     ENCODING_MAP(kThumbPush,          0xb400,
                  kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 2),
+                 | IS_STORE, "push", "<!0R>", 2, kFixupNone),
     ENCODING_MAP(kThumbRorRR,        0x41c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
-                 "rors", "!0C, !1C", 2),
+                 "rors", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbSbc,           0x4180,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C", 2),
+                 "sbcs", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbStmia,         0xc000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 2),
+                 "stmia", "!0C!!, <!1R>", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRI5,      0x6000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2E]", 2),
+                 "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrRRR,       0x5000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C]", 2),
+                 "str", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrSpRel,    0x9000,
-                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
-                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2),
+                 | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRI5,     0x7000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 2),
+                 "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrbRRR,      0x5400,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C]", 2),
+                 "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRI5,     0x8000,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2F]", 2),
+                 "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
     ENCODING_MAP(kThumbStrhRRR,      0x5200,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C]", 2),
+                 "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRRI3,      0x1e00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2d", 2),
+                 "subs", "!0C, !1C, #!2d", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRI8,       0x3800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
-                 "subs", "!0C, #!1d", 2),
+                 "subs", "!0C, #!1d", 2, kFixupNone),
     ENCODING_MAP(kThumbSubRRR,       0x1a00,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C", 2),
+                 "subs", "!0C, !1C, !2C", 2, kFixupNone),
     ENCODING_MAP(kThumbSubSpI7,      0xb080,
                  kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
-                 "sub", "sp, #!0d*4", 2),
+                 "sub", "sp, #!0d*4", 2, kFixupNone),
     ENCODING_MAP(kThumbSwi,           0xdf00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
-                 "swi", "!0d", 2),
+                 "swi", "!0d", 2, kFixupNone),
     ENCODING_MAP(kThumbTst,           0x4200,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C", 2),
+                 "tst", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
     ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuls", "!0s, !1s, !2s", 4),
+                 "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vmuld", "!0S, !1S, !2S", 4),
+                 "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0s, [!1C, #!2E]", 4),
+                 "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "vstr", "!0S, [!1C, #!2E]", 4),
+                 "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0s, !1s, !2s", 4),
+                 "vsub", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vsub", "!0S, !1S, !2S", 4),
+                 "vsub", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vadds,        0xee300a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0s, !1s, !2s", 4),
+                 "vadd", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vadd", "!0S, !1S, !2S", 4),
+                 "vadd", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivs", "!0s, !1s, !2s", 4),
+                 "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
                  kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "vdivd", "!0S, !1S, !2S", 4),
+                 "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32", "!0s, !1s", 4),
+                 "vcvt.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtID,       0xeeb80bc0,
                  kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64", "!0S, !1s", 4),
+                 "vcvt.f64", "!0S, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f32 ", "!0s, !1s", 4),
+                 "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
                  kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.s32.f64 ", "!0s, !1S", 4),
+                 "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
                  kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f64.f32 ", "!0S, !1s", 4),
+                 "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
                  kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vcvt.f32.f64 ", "!0s, !1S", 4),
+                 "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f32 ", "!0s, !1s", 4),
+                 "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vsqrt.f64 ", "!0S, !1S", 4),
+                 "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1m", 4),
+                 "mov", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mov", "!0C, #!1M", 4),
+                 "mov", "!0C, #!1M", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #!2d]", 4),
+                 "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #!2d]", 4),
+                 "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "str", "!0C, [!1C, #-!2d]", 4),
+                 "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldr", "!0C, [!1C, #-!2d]", 4),
+                 "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2),
+                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
     ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
-                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2),
+                 NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
     ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "add", "!0C,!1C,#!2d", 4),
+                 "add", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "mov", "!0C, !1C", 4),
+                 "mov", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f32 ", " !0s, !1s", 4),
+                 "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f64 ", " !0S, !1S", 4),
+                 "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4),
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Stmia,         0xe8800000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stmia", "!0C!!, <!1R>", 4),
+                 "stmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adds", "!0C, !1C, !2C!3H", 4),
+                 "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "subs", "!0C, !1C, !2C!3H", 4),
+                 "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
-                 "sbcs", "!0C, !1C, !2C!3H", 4),
+                 "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "cmp", "!0C, !1C", 4),
+                 "cmp", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
-                 "sub", "!0C,!1C,#!2d", 4),
+                 "sub", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2MvnImm12,  0xf06f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "mvn", "!0C, #!1n", 4),
+                 "mvn", "!0C, #!1n", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
-                 "sel", "!0C, !1C, !2C", 4),
+                 "sel", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
                  kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "ubfx", "!0C, !1C, #!2d, #!3d", 4),
+                 "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
                  kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "sbfx", "!0C, !1C, #!2d, #!3d", 4),
+                 "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
-                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4),
+                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrh", "!0C, [!1C, #!2d]", 4),
+                 "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsh", "!0C, [!1C, #!2d]", 4),
+                 "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrb", "!0C, [!1C, #!2d]", 4),
+                 "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrsb", "!0C, [!1C, #!2d]", 4),
+                 "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strh", "!0C, [!1C, #!2d]", 4),
+                 "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
-                 "strb", "!0C, [!1C, #!2d]", 4),
+                 "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4),
+                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
     ENCODING_MAP(kThumb2Push,          0xe92d0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4),
+                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
     ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
-                 "cmp", "!0C, #!1m", 4),
+                 "cmp", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "adcs", "!0C, !1C, !2C!3H", 4),
+                 "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2AndRRR,  0xea000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "and", "!0C, !1C, !2C!3H", 4),
+                 "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2BicRRR,  0xea200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "bic", "!0C, !1C, !2C!3H", 4),
+                 "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "cmn", "!0C, !1C, shift !2d", 4),
+                 "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2EorRRR,  0xea800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "eor", "!0C, !1C, !2C!3H", 4),
+                 "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "mul", "!0C, !1C, !2C", 4),
+                 "mul", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "mvn", "!0C, !1C, shift !2d", 4),
+                 "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "!0C,!1C,#!2m", 4),
+                 "rsb", "!0C,!1C,#!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "neg", "!0C,!1C", 4),
+                 "neg", "!0C,!1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
-                 "orr", "!0C, !1C, !2C!3H", 4),
+                 "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2TstRR,       0xea100f00,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
-                 "tst", "!0C, !1C, shift !2d", 4),
+                 "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsl", "!0C, !1C, !2C", 4),
+                 "lsl", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "lsr", "!0C, !1C, !2C", 4),
+                 "lsr", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "asr", "!0C, !1C, !2C", 4),
+                 "asr", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "ror", "!0C, !1C, !2C", 4),
+                 "ror", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsl", "!0C, !1C, #!2d", 4),
+                 "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "lsr", "!0C, !1C, #!2d", 4),
+                 "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "asr", "!0C, !1C, #!2d", 4),
+                 "asr", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "ror", "!0C, !1C, #!2d", 4),
+                 "ror", "!0C, !1C, #!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "bic", "!0C, !1C, #!2m", 4),
+                 "bic", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "and", "!0C, !1C, #!2m", 4),
+                 "and", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "orr", "!0C, !1C, #!2m", 4),
+                 "orr", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
-                 "eor", "!0C, !1C, #!2m", 4),
+                 "eor", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "adds", "!0C, !1C, #!2m", 4),
+                 "adds", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "adcs", "!0C, !1C, #!2m", 4),
+                 "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C, !1C, #!2m", 4),
+                 "subs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
-                 "sbcs", "!0C, !1C, #!2m", 4),
+                 "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2It,  0xbf00,
                  kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
-                 "it:!1b", "!0c", 2),
+                 "it:!1b", "!0c", 2, kFixupNone),
     ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
-                 "fmstat", "", 4),
+                 "fmstat", "", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f64", "!0S, !1S", 4),
+                 "vcmp.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
-                 "vcmp.f32", "!0s, !1s", 4),
+                 "vcmp.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldr", "!0C, [r15pc, #!1d]", 4),
+                 "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
-                 "b!1c", "!0t", 4),
+                 "b!1c", "!0t", 4, kFixupCondBranch),
     ENCODING_MAP(kThumb2Vmovd_RR,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f64", "!0S, !1S", 4),
+                 "vmov.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs_RR,       0xeeb00a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vmov.f32", "!0s, !1s", 4),
+                 "vmov.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
                  kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmrs", "!0C, !1s", 4),
+                 "fmrs", "!0C, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
                  kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "fmsr", "!0s, !1C", 4),
+                 "fmsr", "!0s, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
-                 "fmrrd", "!0C, !1C, !2S", 4),
+                 "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
                  kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
-                 "fmdrr", "!0S, !1C, !2C", 4),
+                 "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f64", "!0S, !1S", 4),
+                 "vabs.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vabs.f32", "!0s, !1s", 4),
+                 "vabs.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f64", "!0S, !1S", 4),
+                 "vneg.f64", "!0S, !1S", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
-                 "vneg.f32", "!0s, !1s", 4),
+                 "vneg.f32", "!0s, !1s", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
                  kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f32", "!0s, #0x!1h", 4),
+                 "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
                  kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "vmov.f64", "!0S, #0x!1h", 4),
+                 "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
     ENCODING_MAP(kThumb2Mla,  0xfb000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtBitBlt, 15, 12,
                  IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3,
-                 "mla", "!0C, !1C, !2C, !3C", 4),
+                 "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Umull,  0xfba00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 3, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "umull", "!0C, !1C, !2C, !3C", 4),
+                 "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
-                 "ldrex", "!0C, [!1C, #!2E]", 4),
+                 "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Strex,       0xe8400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
-                 "strex", "!0C,!1C, [!2C, #!2E]", 4),
+                 "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
-                 "clrex", "", 4),
+                 "clrex", "", 4, kFixupNone),
     ENCODING_MAP(kThumb2Bfi,         0xf3600000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
                  kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
-                 "bfi", "!0C,!1C,#!2d,#!3d", 4),
+                 "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
                  kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
-                 "bfc", "!0C,#!1d,#!2d", 4),
+                 "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
                  kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP,
-                 "dmb", "#!0B", 4),
+                 "dmb", "#!0B", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
-                 "ldr", "!0C, [r15pc, -#!1d]", 4),
+                 "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
     ENCODING_MAP(kThumb2Stm,          0xe9000000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
-                 "stm", "!0C, <!1R>", 4),
+                 "stm", "!0C, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumbUndefined,       0xde00,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
-                 "undefined", "", 2),
+                 "undefined", "", 2, kFixupNone),
     // NOTE: vpop, vpush hard-encoded for s16+ reg list
     ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
-                 | IS_LOAD, "vpop", "<!0P>", 4),
+                 | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
     ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
                  kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
-                 | IS_STORE, "vpush", "<!0P>", 4),
+                 | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vldms,        0xec900a00,
                  kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
-                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4),
+                 | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Vstms,        0xec800a00,
                  kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
-                 | IS_STORE, "vstms", "!0C, <!2Q>", 4),
+                 | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
     ENCODING_MAP(kThumb2BUncond,      0xf0009000,
                  kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
-                 "b", "!0t", 4),
+                 "b", "!0t", 4, kFixupT2Branch),
     ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
-                 "movt", "!0C, #!1M", 4),
+                 "movt", "!0C, #!1M", 4, kFixupNone),
     ENCODING_MAP(kThumb2AddPCR,      0x4487,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_UNARY_OP | REG_USE0 | IS_BRANCH,
-                 "add", "rPC, !0C", 2),
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+                 "add", "rPC, !0C", 2, kFixupLabel),
     ENCODING_MAP(kThumb2Adr,         0xf20f0000,
                  kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  /* Note: doesn't affect flags */
                  IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "adr", "!0C,#!1d", 4),
+                 "adr", "!0C,#!1d", 4, kFixupAdr),
     ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "mov", "!0C, #!1M", 4),
+                 "mov", "!0C, #!1M", 4, kFixupMovImmLST),
     ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
-                 "movt", "!0C, #!1M", 4),
+                 "movt", "!0C, #!1M", 4, kFixupMovImmHST),
     ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
-                 "ldmia", "!0C!!, <!1R>", 4),
+                 "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2SubsRRI12,       0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "subs", "!0C,!1C,#!2d", 4),
+                 "subs", "!0C,!1C,#!2d", 4, kFixupNone),
     ENCODING_MAP(kThumb2OrrRRRs,  0xea500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "orrs", "!0C, !1C, !2C!3H", 4),
+                 "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2Push1,    0xf84d0d04,
                  kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
-                 | IS_STORE, "push1", "!0C", 4),
+                 | IS_STORE, "push1", "!0C", 4, kFixupNone),
     ENCODING_MAP(kThumb2Pop1,    0xf85d0b04,
                  kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
-                 | IS_LOAD, "pop1", "!0C", 4),
+                 | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
     ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
-                 "rsbs", "!0C, !1C, !2C!3H", 4),
+                 "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
     ENCODING_MAP(kThumb2Smull,  0xfb800000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 3, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
-                 "smull", "!0C, !1C, !2C, !3C", 4),
+                 "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
     ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
-                 "ldrd", "!0C, !1C, [pc, #!2E]", 4),
+                 "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
     ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
-                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4),
+                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
     ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
                  kFmtBitBlt, 7, 0,
                  IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
-                 "strd", "!0C, !1C, [!2C, #!3E]", 4),
+                 "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
 };
 
+// new_lir replaces orig_lir in the pcrel_fixup list.
+void ArmMir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+  orig_lir->flags.fixup = kFixupNone;
+}
+
+// new_lir is inserted before orig_lir in the pcrel_fixup list.
+void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
+  new_lir->u.a.pcrel_next = orig_lir;
+  if (UNLIKELY(prev_lir == NULL)) {
+    first_fixup_ = new_lir;
+  } else {
+    DCHECK(prev_lir->u.a.pcrel_next == orig_lir);
+    prev_lir->u.a.pcrel_next = new_lir;
+  }
+}
+
 /*
  * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
  * not ready. Since r5FP is not updated often, it is less likely to
@@ -997,290 +1019,16 @@
  */
 #define PADDING_MOV_R5_R5               0x1C2D
 
-/*
- * Assemble the LIR into binary instruction format.  Note that we may
- * discover that pc-relative displacements may not fit the selected
- * instruction.
- */
-AssemblerStatus ArmMir2Lir::AssembleInstructions(uintptr_t start_addr) {
-  LIR* lir;
-  AssemblerStatus res = kSuccess;  // Assume success
-
-  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-    if (lir->opcode < 0) {
-      /* 1 means padding is needed */
-      if ((lir->opcode == kPseudoPseudoAlign4) && (lir->operands[0] == 1)) {
-        code_buffer_.push_back(PADDING_MOV_R5_R5 & 0xFF);
-        code_buffer_.push_back((PADDING_MOV_R5_R5 >> 8) & 0xFF);
-      }
-      continue;
+void ArmMir2Lir::EncodeLIR(LIR* lir) {
+  int opcode = lir->opcode;
+  if (opcode < 0) {
+    if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
+      // Note: size for this opcode will be either 0 or 2 depending on final alignment.
+      lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff);
+      lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
+      lir->flags.size = (lir->offset & 0x2);
     }
-
-    if (lir->flags.is_nop) {
-      continue;
-    }
-
-    /*
-     * For PC-relative displacements we won't know if the
-     * selected instruction will work until late (i.e. - now).
-     * If something doesn't fit, we must replace the short-form
-     * operation with a longer-form one.  Note, though, that this
-     * can change code we've already processed, so we'll need to
-     * re-calculate offsets and restart.  To limit the number of
-     * restarts, the entire list will be scanned and patched.
-     * Of course, the patching itself may cause new overflows so this
-     * is an iterative process.
-     */
-    if (lir->flags.pcRelFixup) {
-      if (lir->opcode == kThumbLdrPcRel ||
-          lir->opcode == kThumb2LdrPcRel12 ||
-          lir->opcode == kThumbAddPcRel ||
-          lir->opcode == kThumb2LdrdPcRel8 ||
-          ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
-          ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
-        /*
-         * PC-relative loads are mostly used to load immediates
-         * that are too large to materialize directly in one shot.
-         * However, if the load displacement exceeds the limit,
-         * we revert to a multiple-instruction materialization sequence.
-         */
-        LIR *lir_target = lir->target;
-        uintptr_t pc = (lir->offset + 4) & ~3;
-        uintptr_t target = lir_target->offset;
-        int delta = target - pc;
-        if (delta & 0x3) {
-          LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-        }
-        // First, a sanity check for cases we shouldn't see now
-        if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
-            ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) {
-          // Shouldn't happen in current codegen.
-          LOG(FATAL) << "Unexpected pc-rel offset " << delta;
-        }
-        // Now, check for the difficult cases
-        if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
-            ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
-            ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
-            ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
-          /*
-           * Note: because rARM_LR may be used to fix up out-of-range
-           * vldrs/vldrd we include REG_DEF_LR in the resource
-           * masks for these instructions.
-           */
-          int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12))
-              ?  lir->operands[0] : rARM_LR;
-
-          // Add new Adr to generate the address.
-          LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
-                     base_reg, 0, 0, 0, 0, lir->target);
-          InsertLIRBefore(lir, new_adr);
-
-          // Convert to normal load.
-          if (lir->opcode == kThumb2LdrPcRel12) {
-            lir->opcode = kThumb2LdrRRI12;
-          } else if (lir->opcode == kThumb2LdrdPcRel8) {
-            lir->opcode = kThumb2LdrdI8;
-          }
-          // Change the load to be relative to the new Adr base.
-          if (lir->opcode == kThumb2LdrdI8) {
-            lir->operands[3] = 0;
-            lir->operands[2] = base_reg;
-          } else {
-            lir->operands[2] = 0;
-            lir->operands[1] = base_reg;
-          }
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        } else {
-          if ((lir->opcode == kThumb2Vldrs) ||
-              (lir->opcode == kThumb2Vldrd) ||
-              (lir->opcode == kThumb2LdrdPcRel8)) {
-            lir->operands[2] = delta >> 2;
-          } else {
-            lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
-                delta >> 2;
-          }
-        }
-      } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        if (delta > 126 || delta < 0) {
-          /*
-           * Convert to cmp rx,#0 / b[eq/ne] tgt pair
-           * Make new branch instruction and insert after
-           */
-          LIR* new_inst =
-            RawLIR(lir->dalvik_offset, kThumbBCond, 0,
-                   (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
-                   0, 0, 0, lir->target);
-          InsertLIRAfter(lir, new_inst);
-          /* Convert the cb[n]z to a cmp rx, #0 ] */
-          lir->opcode = kThumbCmpRI8;
-          /* operand[0] is src1 in both cb[n]z & CmpRI8 */
-          lir->operands[1] = 0;
-          lir->target = 0;
-          SetupResourceMasks(lir);
-          /*
-           * Because we just added this new instruction after the current one,
-           * advance lir so that this new instruction won't be checked for displacement
-           * overflow until the next pass (when its base offset will be properly established).
-           */
-          lir = new_inst;
-          res = kRetryAll;
-        } else {
-          lir->operands[1] = delta >> 1;
-        }
-      } else if (lir->opcode == kThumb2Push || lir->opcode == kThumb2Pop) {
-        if (__builtin_popcount(lir->operands[0]) == 1) {
-          /*
-           * The standard push/pop multiple instruction
-           * requires at least two registers in the list.
-           * If we've got just one, switch to the single-reg
-           * encoding.
-           */
-          lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
-              kThumb2Pop1;
-          int reg = 0;
-          while (lir->operands[0]) {
-            if (lir->operands[0] & 0x1) {
-              break;
-            } else {
-              reg++;
-              lir->operands[0] >>= 1;
-            }
-          }
-          lir->operands[0] = reg;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumbBCond || lir->opcode == kThumb2BCond) {
-        LIR *target_lir = lir->target;
-        int delta = 0;
-        DCHECK(target_lir);
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        delta = target - pc;
-        if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
-          lir->opcode = kThumb2BCond;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-        lir->operands[0] = delta >> 1;
-      } else if (lir->opcode == kThumb2BUncond) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        lir->operands[0] = delta >> 1;
-        if (!(cu_->disable_opt & (1 << kSafeOptimizations)) &&
-          lir->operands[0] == 0) {  // Useless branch
-          NopLIR(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumbBUncond) {
-        LIR *target_lir = lir->target;
-        uintptr_t pc = lir->offset + 4;
-        uintptr_t target = target_lir->offset;
-        int delta = target - pc;
-        if (delta > 2046 || delta < -2048) {
-          // Convert to Thumb2BCond w/ kArmCondAl
-          lir->opcode = kThumb2BUncond;
-          lir->operands[0] = 0;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        } else {
-          lir->operands[0] = delta >> 1;
-          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) &&
-            lir->operands[0] == -1) {  // Useless branch
-            NopLIR(lir);
-            res = kRetryAll;
-          }
-        }
-      } else if (lir->opcode == kThumbBlx1) {
-        DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
-        /* cur_pc is Thumb */
-        uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3;
-        uintptr_t target = lir->operands[1];
-
-        /* Match bit[1] in target with base */
-        if (cur_pc & 0x2) {
-          target |= 0x2;
-        }
-        int delta = target - cur_pc;
-        DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-        lir->operands[0] = (delta >> 12) & 0x7ff;
-        NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-      } else if (lir->opcode == kThumbBl1) {
-        DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
-        /* Both cur_pc and target are Thumb */
-        uintptr_t cur_pc = start_addr + lir->offset + 4;
-        uintptr_t target = lir->operands[1];
-
-        int delta = target - cur_pc;
-        DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-        lir->operands[0] = (delta >> 12) & 0x7ff;
-        NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-      } else if (lir->opcode == kThumb2Adr) {
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]);
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset
-                    : target->offset;
-        int disp = target_disp - ((lir->offset + 4) & ~3);
-        if (disp < 4096) {
-          lir->operands[1] = disp;
-        } else {
-          // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
-          // TUNING: if this case fires often, it can be improved.  Not expected to be common.
-          LIR *new_mov16L =
-              RawLIR(lir->dalvik_offset, kThumb2MovImm16LST,
-                     lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
-                     reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
-          InsertLIRBefore(lir, new_mov16L);
-          LIR *new_mov16H =
-              RawLIR(lir->dalvik_offset, kThumb2MovImm16HST,
-                     lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
-                     reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
-          InsertLIRBefore(lir, new_mov16H);
-          if (ARM_LOWREG(lir->operands[0])) {
-            lir->opcode = kThumbAddRRLH;
-          } else {
-            lir->opcode = kThumbAddRRHH;
-          }
-          lir->operands[1] = rARM_PC;
-          SetupResourceMasks(lir);
-          res = kRetryAll;
-        }
-      } else if (lir->opcode == kThumb2MovImm16LST) {
-        // operands[1] should hold disp, [2] has add, [3] has tab_rec
-        LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
-        // If tab_rec is null, this is a literal load. Use target
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset : target->offset;
-        lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
-      } else if (lir->opcode == kThumb2MovImm16HST) {
-        // operands[1] should hold disp, [2] has add, [3] has tab_rec
-        LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
-        SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
-        // If tab_rec is null, this is a literal load. Use target
-        LIR* target = lir->target;
-        int target_disp = tab_rec ? tab_rec->offset : target->offset;
-        lir->operands[1] =
-            ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
-      }
-    }
-    /*
-     * If one of the pc-relative instructions expanded we'll have
-     * to make another pass.  Don't bother to fully assemble the
-     * instruction.
-     */
-    if (res != kSuccess) {
-      continue;
-    }
+  } else if (LIKELY(!lir->flags.is_nop)) {
     const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
     uint32_t bits = encoder->skeleton;
     int i;
@@ -1288,113 +1036,623 @@
       uint32_t operand;
       uint32_t value;
       operand = lir->operands[i];
-      switch (encoder->field_loc[i].kind) {
-        case kFmtUnused:
-          break;
-        case kFmtFPImm:
-          value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
-          value |= (operand & 0x0F) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        case kFmtBrOffset:
-          value = ((operand  & 0x80000) >> 19) << 26;
-          value |= ((operand & 0x40000) >> 18) << 11;
-          value |= ((operand & 0x20000) >> 17) << 13;
-          value |= ((operand & 0x1f800) >> 11) << 16;
-          value |= (operand  & 0x007ff);
-          bits |= value;
-          break;
-        case kFmtShift5:
-          value = ((operand & 0x1c) >> 2) << 12;
-          value |= (operand & 0x03) << 6;
-          bits |= value;
-          break;
-        case kFmtShift:
-          value = ((operand & 0x70) >> 4) << 12;
-          value |= (operand & 0x0f) << 4;
-          bits |= value;
-          break;
-        case kFmtBWidth:
-          value = operand - 1;
-          bits |= value;
-          break;
-        case kFmtLsb:
-          value = ((operand & 0x1c) >> 2) << 12;
-          value |= (operand & 0x03) << 6;
-          bits |= value;
-          break;
-        case kFmtImm6:
-          value = ((operand & 0x20) >> 5) << 9;
-          value |= (operand & 0x1f) << 3;
-          bits |= value;
-          break;
-        case kFmtBitBlt:
-          value = (operand << encoder->field_loc[i].start) &
-              ((1 << (encoder->field_loc[i].end + 1)) - 1);
-          bits |= value;
-          break;
-        case kFmtDfp: {
-          DCHECK(ARM_DOUBLEREG(operand));
-          DCHECK_EQ((operand & 0x1), 0U);
-          int reg_name = (operand & ARM_FP_REG_MASK) >> 1;
-          /* Snag the 1-bit slice and position it */
-          value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
-          /* Extract and position the 4-bit slice */
-          value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        }
-        case kFmtSfp:
-          DCHECK(ARM_SINGLEREG(operand));
-          /* Snag the 1-bit slice and position it */
-          value = (operand & 0x1) << encoder->field_loc[i].end;
-          /* Extract and position the 4-bit slice */
-          value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
-          bits |= value;
-          break;
-        case kFmtImm12:
-        case kFmtModImm:
-          value = ((operand & 0x800) >> 11) << 26;
-          value |= ((operand & 0x700) >> 8) << 12;
-          value |= operand & 0x0ff;
-          bits |= value;
-          break;
-        case kFmtImm16:
-          value = ((operand & 0x0800) >> 11) << 26;
-          value |= ((operand & 0xf000) >> 12) << 16;
-          value |= ((operand & 0x0700) >> 8) << 12;
-          value |= operand & 0x0ff;
-          bits |= value;
-          break;
-        case kFmtOff24: {
-          uint32_t signbit = (operand >> 31) & 0x1;
-          uint32_t i1 = (operand >> 22) & 0x1;
-          uint32_t i2 = (operand >> 21) & 0x1;
-          uint32_t imm10 = (operand >> 11) & 0x03ff;
-          uint32_t imm11 = operand & 0x07ff;
-          uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
-          uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
-          value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
-              imm11;
-          bits |= value;
+      ArmEncodingKind kind = encoder->field_loc[i].kind;
+      if (LIKELY(kind == kFmtBitBlt)) {
+        value = (operand << encoder->field_loc[i].start) &
+            ((1 << (encoder->field_loc[i].end + 1)) - 1);
+        bits |= value;
+      } else {
+        switch (encoder->field_loc[i].kind) {
+          case kFmtSkip:
+            break;  // Nothing to do, but continue to next.
+          case kFmtUnused:
+            i = 4;  // Done, break out of the enclosing loop.
+            break;
+          case kFmtFPImm:
+            value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
+            value |= (operand & 0x0F) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
+          case kFmtBrOffset:
+            value = ((operand  & 0x80000) >> 19) << 26;
+            value |= ((operand & 0x40000) >> 18) << 11;
+            value |= ((operand & 0x20000) >> 17) << 13;
+            value |= ((operand & 0x1f800) >> 11) << 16;
+            value |= (operand  & 0x007ff);
+            bits |= value;
+            break;
+          case kFmtShift5:
+            value = ((operand & 0x1c) >> 2) << 12;
+            value |= (operand & 0x03) << 6;
+            bits |= value;
+            break;
+          case kFmtShift:
+            value = ((operand & 0x70) >> 4) << 12;
+            value |= (operand & 0x0f) << 4;
+            bits |= value;
+            break;
+          case kFmtBWidth:
+            value = operand - 1;
+            bits |= value;
+            break;
+          case kFmtLsb:
+            value = ((operand & 0x1c) >> 2) << 12;
+            value |= (operand & 0x03) << 6;
+            bits |= value;
+            break;
+          case kFmtImm6:
+            value = ((operand & 0x20) >> 5) << 9;
+            value |= (operand & 0x1f) << 3;
+            bits |= value;
+            break;
+          case kFmtDfp: {
+            DCHECK(ARM_DOUBLEREG(operand));
+            DCHECK_EQ((operand & 0x1), 0U);
+            int reg_name = (operand & ARM_FP_REG_MASK) >> 1;
+            /* Snag the 1-bit slice and position it */
+            value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
+            /* Extract and position the 4-bit slice */
+            value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
           }
-          break;
-        default:
-          LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+          case kFmtSfp:
+            DCHECK(ARM_SINGLEREG(operand));
+            /* Snag the 1-bit slice and position it */
+            value = (operand & 0x1) << encoder->field_loc[i].end;
+            /* Extract and position the 4-bit slice */
+            value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
+            bits |= value;
+            break;
+          case kFmtImm12:
+          case kFmtModImm:
+            value = ((operand & 0x800) >> 11) << 26;
+            value |= ((operand & 0x700) >> 8) << 12;
+            value |= operand & 0x0ff;
+            bits |= value;
+            break;
+          case kFmtImm16:
+            value = ((operand & 0x0800) >> 11) << 26;
+            value |= ((operand & 0xf000) >> 12) << 16;
+            value |= ((operand & 0x0700) >> 8) << 12;
+            value |= operand & 0x0ff;
+            bits |= value;
+            break;
+          case kFmtOff24: {
+            uint32_t signbit = (operand >> 31) & 0x1;
+            uint32_t i1 = (operand >> 22) & 0x1;
+            uint32_t i2 = (operand >> 21) & 0x1;
+            uint32_t imm10 = (operand >> 11) & 0x03ff;
+            uint32_t imm11 = operand & 0x07ff;
+            uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+            uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+            value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                imm11;
+            bits |= value;
+            }
+            break;
+          default:
+            LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+        }
       }
     }
     if (encoder->size == 4) {
-      code_buffer_.push_back((bits >> 16) & 0xff);
-      code_buffer_.push_back((bits >> 24) & 0xff);
+      lir->u.a.bytes[0] = ((bits >> 16) & 0xff);
+      lir->u.a.bytes[1] = ((bits >> 24) & 0xff);
+      lir->u.a.bytes[2] = (bits & 0xff);
+      lir->u.a.bytes[3] = ((bits >> 8) & 0xff);
+    } else {
+      DCHECK_EQ(encoder->size, 2);
+      lir->u.a.bytes[0] = (bits & 0xff);
+      lir->u.a.bytes[1] = ((bits >> 8) & 0xff);
     }
-    code_buffer_.push_back(bits & 0xff);
-    code_buffer_.push_back((bits >> 8) & 0xff);
+    lir->flags.size = encoder->size;
   }
-  return res;
+}
+
+// Assemble the LIR into binary instruction format.
+void ArmMir2Lir::AssembleLIR() {
+  LIR* lir;
+  LIR* prev_lir;
+  int assembler_retries = 0;
+  int starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0);
+  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  int offset_adjustment;
+  AssignDataOffsets();
+
+  /*
+   * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for non-visited nodes).
+   * Start at zero here, and bit will be flipped to 1 on entry to the loop.
+   */
+  int generation = 0;
+  while (true) {
+    offset_adjustment = 0;
+    AssemblerStatus res = kSuccess;  // Assume success
+    generation ^= 1;
+    // Note: nodes requring possible fixup linked in ascending order.
+    lir = first_fixup_;
+    prev_lir = NULL;
+    while (lir != NULL) {
+      /*
+       * NOTE: the lir being considered here will be encoded following the switch (so long as
+       * we're not in a retry situation).  However, any new non-pc_rel instructions inserted
+       * due to retry must be explicitly encoded at the time of insertion.  Note that
+       * inserted instructions don't need use/def flags, but do need size and pc-rel status
+       * properly updated.
+       */
+      lir->offset += offset_adjustment;
+      // During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
+      lir->flags.generation = generation;
+      switch (static_cast<FixupKind>(lir->flags.fixup)) {
+        case kFixupLabel:
+        case kFixupNone:
+          break;
+        case kFixupVLoad:
+          if (lir->operands[1] != r15pc) {
+            break;
+          }
+          // NOTE: intentional fallthrough.
+        case kFixupLoad: {
+          /*
+           * PC-relative loads are mostly used to load immediates
+           * that are too large to materialize directly in one shot.
+           * However, if the load displacement exceeds the limit,
+           * we revert to a multiple-instruction materialization sequence.
+           */
+          LIR *lir_target = lir->target;
+          uintptr_t pc = (lir->offset + 4) & ~3;
+          uintptr_t target = lir_target->offset +
+              ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (res != kSuccess) {
+            /*
+             * In this case, we're just estimating and will do it again for real.  Ensure offset
+             * is legal.
+             */
+            delta &= ~0x3;
+          }
+          DCHECK_EQ((delta & 0x3), 0);
+          // First, a sanity check for cases we shouldn't see now
+          if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
+              ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
+            // Shouldn't happen in current codegen.
+            LOG(FATAL) << "Unexpected pc-rel offset " << delta;
+          }
+          // Now, check for the difficult cases
+          if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+              ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+              ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+            /*
+             * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
+             * sometimes have to use it to fix up out-of-range accesses.  This is where that
+             * happens.
+             */
+            int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
+                            (lir->opcode == kThumb2LdrPcRel12)) ?  lir->operands[0] : rARM_LR;
+
+            // Add new Adr to generate the address.
+            LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
+                       base_reg, 0, 0, 0, 0, lir->target);
+            new_adr->offset = lir->offset;
+            new_adr->flags.fixup = kFixupAdr;
+            new_adr->flags.size = EncodingMap[kThumb2Adr].size;
+            InsertLIRBefore(lir, new_adr);
+            lir->offset += new_adr->flags.size;
+            offset_adjustment += new_adr->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_adr.
+            ReplaceFixup(prev_lir, lir, new_adr);
+
+            // Convert to normal load.
+            offset_adjustment -= lir->flags.size;
+            if (lir->opcode == kThumb2LdrPcRel12) {
+              lir->opcode = kThumb2LdrRRI12;
+            } else if (lir->opcode == kThumb2LdrdPcRel8) {
+              lir->opcode = kThumb2LdrdI8;
+            }
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Change the load to be relative to the new Adr base.
+            if (lir->opcode == kThumb2LdrdI8) {
+              lir->operands[3] = 0;
+              lir->operands[2] = base_reg;
+            } else {
+              lir->operands[2] = 0;
+              lir->operands[1] = base_reg;
+            }
+            // Must redo encoding here - won't ever revisit this node.
+            EncodeLIR(lir);
+            prev_lir = new_adr;  // Continue scan with new_adr;
+            lir = new_adr->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            if ((lir->opcode == kThumb2Vldrs) ||
+                (lir->opcode == kThumb2Vldrd) ||
+                (lir->opcode == kThumb2LdrdPcRel8)) {
+              lir->operands[2] = delta >> 2;
+            } else {
+              lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
+                  delta >> 2;
+            }
+          }
+          break;
+        }
+        case kFixupCBxZ: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (delta > 126 || delta < 0) {
+            /*
+             * Convert to cmp rx,#0 / b[eq/ne] tgt pair
+             * Make new branch instruction and insert after
+             */
+            LIR* new_inst =
+              RawLIR(lir->dalvik_offset, kThumbBCond, 0,
+                     (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
+                     0, 0, 0, lir->target);
+            InsertLIRAfter(lir, new_inst);
+
+            /* Convert the cb[n]z to a cmp rx, #0 ] */
+            // Subtract the old size.
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumbCmpRI8;
+            /* operand[0] is src1 in both cb[n]z & CmpRI8 */
+            lir->operands[1] = 0;
+            lir->target = 0;
+            EncodeLIR(lir);   // NOTE: sets flags.size.
+            // Add back the new size.
+            DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size));
+            offset_adjustment += lir->flags.size;
+            // Set up the new following inst.
+            new_inst->offset = lir->offset + lir->flags.size;
+            new_inst->flags.fixup = kFixupCondBranch;
+            new_inst->flags.size = EncodingMap[new_inst->opcode].size;
+            offset_adjustment += new_inst->flags.size;
+
+            // lir no longer pcrel, unlink and link in new_inst.
+            ReplaceFixup(prev_lir, lir, new_inst);
+            prev_lir = new_inst;  // Continue with the new instruction.
+            lir = new_inst->u.a.pcrel_next;
+            res = kRetryAll;
+            continue;
+          } else {
+            lir->operands[1] = delta >> 1;
+          }
+          break;
+        }
+        case kFixupPushPop: {
+          if (__builtin_popcount(lir->operands[0]) == 1) {
+            /*
+             * The standard push/pop multiple instruction
+             * requires at least two registers in the list.
+             * If we've got just one, switch to the single-reg
+             * encoding.
+             */
+            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
+                kThumb2Pop1;
+            int reg = 0;
+            while (lir->operands[0]) {
+              if (lir->operands[0] & 0x1) {
+                break;
+              } else {
+                reg++;
+                lir->operands[0] >>= 1;
+              }
+            }
+            lir->operands[0] = reg;
+            // This won't change again, don't bother unlinking, just reset fixup kind
+            lir->flags.fixup = kFixupNone;
+          }
+          break;
+        }
+        case kFixupCondBranch: {
+          LIR *target_lir = lir->target;
+          int delta = 0;
+          DCHECK(target_lir);
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          delta = target - pc;
+          if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BCond;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            // Fixup kind remains the same.
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          }
+          lir->operands[0] = delta >> 1;
+          break;
+        }
+        case kFixupT2Branch: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          lir->operands[0] = delta >> 1;
+          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
+            // Useless branch
+            offset_adjustment -= lir->flags.size;
+            lir->flags.is_nop = true;
+            // Don't unlink - just set to do-nothing.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupT1Branch: {
+          LIR *target_lir = lir->target;
+          uintptr_t pc = lir->offset + 4;
+          uintptr_t target = target_lir->offset +
+              ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int delta = target - pc;
+          if (delta > 2046 || delta < -2048) {
+            // Convert to Thumb2BCond w/ kArmCondAl
+            offset_adjustment -= lir->flags.size;
+            lir->opcode = kThumb2BUncond;
+            lir->operands[0] = 0;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            lir->flags.fixup = kFixupT2Branch;
+            offset_adjustment += lir->flags.size;
+            res = kRetryAll;
+          } else {
+            lir->operands[0] = delta >> 1;
+            if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
+              // Useless branch
+              offset_adjustment -= lir->flags.size;
+              lir->flags.is_nop = true;
+              // Don't unlink - just set to do-nothing.
+              lir->flags.fixup = kFixupNone;
+              res = kRetryAll;
+            }
+          }
+          break;
+        }
+        case kFixupBlx1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
+          /* cur_pc is Thumb */
+          uintptr_t cur_pc = (lir->offset + 4) & ~3;
+          uintptr_t target = lir->operands[1];
+
+          /* Match bit[1] in target with base */
+          if (cur_pc & 0x2) {
+            target |= 0x2;
+          }
+          int delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupBl1: {
+          DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
+          /* Both cur_pc and target are Thumb */
+          uintptr_t cur_pc = lir->offset + 4;
+          uintptr_t target = lir->operands[1];
+
+          int delta = target - cur_pc;
+          DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+          lir->operands[0] = (delta >> 12) & 0x7ff;
+          NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+          break;
+        }
+        case kFixupAdr: {
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]);
+          LIR* target = lir->target;
+          int target_disp = (tab_rec != NULL) ?  tab_rec->offset + offset_adjustment
+              : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+          int disp = target_disp - ((lir->offset + 4) & ~3);
+          if (disp < 4096) {
+            lir->operands[1] = disp;
+          } else {
+            // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
+            // TUNING: if this case fires often, it can be improved.  Not expected to be common.
+            LIR *new_mov16L =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16LST,
+                       lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
+                       reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
+            new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
+            new_mov16L->flags.fixup = kFixupMovImmLST;
+            new_mov16L->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16L);
+            lir->offset += new_mov16L->flags.size;
+            offset_adjustment += new_mov16L->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16L);
+            prev_lir = new_mov16L;   // Now we've got a new prev.
+
+            LIR *new_mov16H =
+                RawLIR(lir->dalvik_offset, kThumb2MovImm16HST,
+                       lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir),
+                       reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target);
+            new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
+            new_mov16H->flags.fixup = kFixupMovImmHST;
+            new_mov16H->offset = lir->offset;
+            // Link the new instruction, retaining lir.
+            InsertLIRBefore(lir, new_mov16H);
+            lir->offset += new_mov16H->flags.size;
+            offset_adjustment += new_mov16H->flags.size;
+            InsertFixupBefore(prev_lir, lir, new_mov16H);
+            prev_lir = new_mov16H;  // Now we've got a new prev.
+
+            offset_adjustment -= lir->flags.size;
+            if (ARM_LOWREG(lir->operands[0])) {
+              lir->opcode = kThumbAddRRLH;
+            } else {
+              lir->opcode = kThumbAddRRHH;
+            }
+            lir->operands[1] = rARM_PC;
+            lir->flags.size = EncodingMap[lir->opcode].size;
+            offset_adjustment += lir->flags.size;
+            // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
+          break;
+        }
+        case kFixupMovImmLST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
+          break;
+        }
+        case kFixupMovImmHST: {
+          // operands[1] should hold disp, [2] has add, [3] has tab_rec
+          LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]);
+          SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]);
+          // If tab_rec is null, this is a literal load. Use target
+          LIR* target = lir->target;
+          int target_disp = tab_rec ? tab_rec->offset : target->offset;
+          lir->operands[1] =
+              ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
+          break;
+        }
+        case kFixupAlign4: {
+          int required_size = lir->offset & 0x2;
+          if (lir->flags.size != required_size) {
+            offset_adjustment += required_size - lir->flags.size;
+            lir->flags.size = required_size;
+            res = kRetryAll;
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
+      }
+      /*
+       * If one of the pc-relative instructions expanded we'll have
+       * to make another pass.  Don't bother to fully assemble the
+       * instruction.
+       */
+      if (res == kSuccess) {
+        EncodeLIR(lir);
+        if (assembler_retries == 0) {
+          // Go ahead and fix up the code buffer image.
+          for (int i = 0; i < lir->flags.size; i++) {
+            code_buffer_[lir->offset + i] = lir->u.a.bytes[i];
+          }
+        }
+      }
+      prev_lir = lir;
+      lir = lir->u.a.pcrel_next;
+    }
+
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      starting_offset += offset_adjustment;
+      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      AssignDataOffsets();
+    }
+  }
+
+  // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is.
+  if (assembler_retries != 0) {
+    code_buffer_.clear();
+    for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+      if (lir->flags.is_nop) {
+        continue;
+      } else  {
+        for (int i = 0; i < lir->flags.size; i++) {
+          code_buffer_.push_back(lir->u.a.bytes[i]);
+        }
+      }
+    }
+  }
+
+  data_offset_ = (code_buffer_.size() + 0x3) & ~0x3;
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
 }
 
 int ArmMir2Lir::GetInsnSize(LIR* lir) {
   return EncodingMap[lir->opcode].size;
 }
 
+// Encode instruction bit pattern and assign offsets.
+uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+  LIR* end_lir = tail_lir->next;
+
+  /*
+   * A significant percentage of methods can be assembled in a single pass.  We'll
+   * go ahead and build the code image here, leaving holes for pc-relative fixup
+   * codes.  If the code size changes during that pass, we'll have to throw away
+   * this work - but if not, we're ready to go.
+   */
+  code_buffer_.reserve(estimated_native_code_size_ + 256);  // Add a little slop.
+  LIR* last_fixup = NULL;
+  for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (!lir->flags.is_nop) {
+      if (lir->flags.fixup != kFixupNone) {
+        if (lir->opcode >= 0) {
+          lir->flags.size = EncodingMap[lir->opcode].size;
+          lir->flags.fixup = EncodingMap[lir->opcode].fixup;
+        } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+          lir->flags.size = (offset & 0x2);
+          lir->flags.fixup = kFixupAlign4;
+        } else {
+          lir->flags.size = 0;
+          lir->flags.fixup = kFixupLabel;
+        }
+        // Link into the fixup chain.
+        lir->flags.use_def_invalid = true;
+        lir->u.a.pcrel_next = NULL;
+        if (first_fixup_ == NULL) {
+          first_fixup_ = lir;
+        } else {
+          last_fixup->u.a.pcrel_next = lir;
+        }
+        last_fixup = lir;
+      } else {
+        EncodeLIR(lir);
+      }
+      for (int i = 0; i < lir->flags.size; i++) {
+        code_buffer_.push_back(lir->u.a.bytes[i]);
+      }
+      offset += lir->flags.size;
+    }
+  }
+  return offset;
+}
+
+void ArmMir2Lir::AssignDataOffsets() {
+  /* Set up offsets for literals */
+  int offset = data_offset_;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  total_size_ = AssignFillArrayDataOffset(offset);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index bba2ec5..401da2a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -440,88 +440,120 @@
 }
 
 /*
- * Handle simple case (thin lock) inline.  If it's complicated, bail
- * out to the heavyweight lock/unlock routines.  We'll use dedicated
- * registers here in order to be in the right position in case we
- * to bail to oat[Lock/Unlock]Object(self, object)
- *
- * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object
- * r1 -> object [arg1 for oat[Lock/Unlock]Object
- * r2 -> intial contents of object->lock, later result of strex
- * r3 -> self->thread_id
- * r12 -> allow to be used by utilities as general temp
- *
- * The result of the strex is 0 if we acquire the lock.
- *
- * See comments in monitor.cc for the layout of the lock word.
- * Of particular interest to this code is the test for the
- * simple case - which we handle inline.  For monitor enter, the
- * simple case is thin lock, held by no-one.  For monitor exit,
- * the simple case is thin lock, held by the unlocking thread with
- * a recurse count of 0.
- *
- * A minor complication is that there is a field in the lock word
- * unrelated to locking: the hash state.  This field must be ignored, but
- * preserved.
- *
+ * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc.
  */
 void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  NewLIR3(kThumb2Ldrex, r1, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);  // Get object->lock
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondEq, "");
-  NewLIR4(kThumb2Strex, r1, r2, r0,
-          mirror::Object::MonitorOffset().Int32Value() >> 2);
-  OpRegImm(kOpCmp, r1, 0);
-  OpIT(kCondNe, "T");
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kLoadLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    LIR* null_check_branch;
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL);
+    NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL);
+
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    not_unlocked_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artLockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    lock_success_branch->target = success_target;
+    GenMemBarrier(kLoadLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondEq, "");
+    NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    OpRegImm(kOpCmp, r1, 0);
+    OpIT(kCondNe, "T");
+    // Go expensive route - artLockObjectFromCode(self, obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kLoadLoad);
+  }
 }
 
 /*
- * For monitor unlock, we don't have to use ldrex/strex.  Once
- * we've determined that the lock is thin and that we own it with
- * a zero recursion count, it's safe to punch it back to the
- * initial, unlock thin state with a store word.
+ * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
+ * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * and can only give away ownership if its suspended.
  */
 void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  DCHECK_EQ(LW_SHAPE_THIN, 0);
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
-  LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+  LIR* null_check_branch;
   LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
-  // Is lock unheld on lock or held by us (==thread_id) on unlock?
-  OpRegRegImm(kOpAnd, r3, r1,
-              (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  // Align owner
-  OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  OpRegReg(kOpSub, r1, r2);
-  OpIT(kCondEq, "EE");
-  StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, rARM_LR);
-  MarkSafepointPC(call_inst);
-  GenMemBarrier(kStoreLoad);
+  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
+  if (kArchVariantHasGoodBranchPredictor) {
+    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+      null_check_branch = nullptr;  // No null check.
+    } else {
+      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+      null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL);
+    }
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);
+    LoadConstantNoClobber(r3, 0);
+    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL);
+    StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+    slow_unlock_branch->target = slow_path_target;
+    if (null_check_branch != nullptr) {
+      null_check_branch->target = slow_path_target;
+    }
+    // TODO: move to a slow path.
+    // Go expensive route - artUnlockObjectFromCode(obj);
+    LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx, rARM_LR);
+    MarkSafepointPC(call_inst);
+
+    LIR* success_target = NewLIR0(kPseudoTargetLabel);
+    unlock_success_branch->target = success_target;
+    GenMemBarrier(kStoreLoad);
+  } else {
+    // Explicit null-check as slow-path is entered using an IT.
+    GenNullCheck(rl_src.s_reg_low, r0, opt_flags);
+    LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1);  // Get lock
+    LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2);
+    LoadConstantNoClobber(r3, 0);
+    // Is lock unheld on lock or held by us (==thread_id) on unlock?
+    OpRegReg(kOpCmp, r1, r2);
+    OpIT(kCondEq, "EE");
+    StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3);
+    // Go expensive route - UnlockObjectFromCode(obj);
+    LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR);
+    ClobberCalleeSave();
+    LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR);
+    MarkSafepointPC(call_inst);
+    GenMemBarrier(kStoreLoad);
+  }
 }
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 1954fba..b75661c 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -70,9 +70,14 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset);
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
+    void EncodeLIR(LIR* lir);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -187,6 +192,9 @@
     MIR* SpecialIdentity(MIR* mir);
     LIR* LoadFPConstantValue(int r_dest, int value);
     bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
+    void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
+    void AssignDataOffsets();
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 07782d9..6d11b03 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -24,8 +24,7 @@
 
 namespace art {
 
-LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1,
-         int src2, LIR* target) {
+LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) {
   OpRegReg(kOpCmp, src1, src2);
   return OpCondBranch(cond, target);
 }
@@ -319,7 +318,18 @@
   LIR* branch;
   int mod_imm;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
-  if ((ARM_LOWREG(reg)) && (check_value == 0) &&
+  /*
+   * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
+   * compare-and-branch if zero is ideal if it will reach.  However, because null checks
+   * branch forward to a launch pad, they will frequently not reach - and thus have to
+   * be converted to a long form during assembly (which will trigger another assembly
+   * pass).  Here we estimate the branch distance for checks, and if large directly
+   * generate the long form in an attempt to avoid an extra assembly pass.
+   * TODO: consider interspersing launchpads in code following unconditional branches.
+   */
+  bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
+  skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
+  if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) &&
      ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg, 0);
@@ -624,7 +634,7 @@
       break;
   }
   LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor);
-  dmb->def_mask = ENCODE_ALL;
+  dmb->u.m.def_mask = ENCODE_ALL;
 #endif
 }
 
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 203a8cc..a4ea10b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -118,78 +118,83 @@
   return ENCODE_ARM_REG_PC;
 }
 
-void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) {
+// Thumb2 specific setup.  TODO: inline?:
+void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK(!lir->flags.use_def_invalid);
 
-  // Thumb2 specific setup
-  uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags;
   int opcode = lir->opcode;
 
-  if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_DEF_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_LIST1) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->def_mask, lir->operands[1] + i);
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
+                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
+                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
     }
-  }
 
-  if (flags & REG_USE_PC) {
-    lir->use_mask |= ENCODE_ARM_REG_PC;
-  }
-
-  /* Conservatively treat the IT block */
-  if (flags & IS_IT) {
-    lir->def_mask = ENCODE_ALL;
-  }
-
-  if (flags & REG_USE_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_LIST1) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->use_mask, lir->operands[1] + i);
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
     }
-  }
-  /* Fixup for kThumbPush/lr and kThumbPop/pc */
-  if (opcode == kThumbPush || opcode == kThumbPop) {
-    uint64_t r8Mask = GetRegMaskCommon(r8);
-    if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) {
-      lir->use_mask &= ~r8Mask;
-      lir->use_mask |= ENCODE_ARM_REG_LR;
-    } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) {
-      lir->def_mask &= ~r8Mask;
-      lir->def_mask |= ENCODE_ARM_REG_PC;
+
+    if (flags & REG_DEF_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
     }
-  }
-  if (flags & REG_DEF_LR) {
-    lir->def_mask |= ENCODE_ARM_REG_LR;
+
+    if (flags & REG_DEF_LIST1) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
+      }
+    }
+
+    if (flags & REG_USE_PC) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
+    }
+
+    /* Conservatively treat the IT block */
+    if (flags & IS_IT) {
+      lir->u.m.def_mask = ENCODE_ALL;
+    }
+
+    if (flags & REG_USE_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_LIST1) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
+      }
+    }
+    /* Fixup for kThumbPush/lr and kThumbPop/pc */
+    if (opcode == kThumbPush || opcode == kThumbPop) {
+      uint64_t r8Mask = GetRegMaskCommon(r8);
+      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
+        lir->u.m.use_mask &= ~r8Mask;
+        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
+      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
+        lir->u.m.def_mask &= ~r8Mask;
+        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
+      }
+    }
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
   }
 }
 
@@ -466,8 +471,8 @@
 
     /* Memory bits */
     if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff,
-              (arm_lir->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+              DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index c63de69..a7b8dfe 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -90,7 +90,6 @@
   LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
                           r_dest, r15pc, 0, 0, 0, data_target);
   SetMemRefType(load_pc_rel, true, kLiteral);
-  load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
   AppendLIR(load_pc_rel);
   return load_pc_rel;
 }
@@ -626,7 +625,6 @@
                    r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target);
     }
     SetMemRefType(res, true, kLiteral);
-    res->alias_info = reinterpret_cast<uintptr_t>(data_target);
     AppendLIR(res);
   }
   return res;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e49f0b..617f357 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -45,9 +45,10 @@
 }
 
 void Mir2Lir::MarkSafepointPC(LIR* inst) {
-  inst->def_mask = ENCODE_ALL;
+  DCHECK(!inst->flags.use_def_invalid);
+  inst->u.m.def_mask = ENCODE_ALL;
   LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
-  DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL);
+  DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL);
 }
 
 bool Mir2Lir::FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile) {
@@ -87,10 +88,11 @@
   uint64_t *mask_ptr;
   uint64_t mask = ENCODE_MEM;
   DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE));
+  DCHECK(!lir->flags.use_def_invalid);
   if (is_load) {
-    mask_ptr = &lir->use_mask;
+    mask_ptr = &lir->u.m.use_mask;
   } else {
-    mask_ptr = &lir->def_mask;
+    mask_ptr = &lir->u.m.def_mask;
   }
   /* Clear out the memref flags */
   *mask_ptr &= ~mask;
@@ -127,7 +129,7 @@
    * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit
    * access.
    */
-  lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
+  lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
 }
 
 /*
@@ -213,11 +215,11 @@
       break;
   }
 
-  if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->use_mask, "use"));
+  if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) {
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use"));
   }
-  if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) {
-    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->def_mask, "def"));
+  if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) {
+    DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def"));
   }
 }
 
@@ -348,6 +350,7 @@
     new_value->operands[0] = value;
     new_value->next = *constant_list_p;
     *constant_list_p = new_value;
+    estimated_native_code_size_ += sizeof(value);
     return new_value;
   }
   return NULL;
@@ -431,6 +434,7 @@
     int bx_offset = INVALID_OFFSET;
     switch (cu_->instruction_set) {
       case kThumb2:
+        DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
         bx_offset = tab_rec->anchor->offset + 4;
         break;
       case kX86:
@@ -714,111 +718,29 @@
   return offset;
 }
 
-// LIR offset assignment.
-int Mir2Lir::AssignInsnOffsets() {
-  LIR* lir;
-  int offset = 0;
-
-  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-    lir->offset = offset;
-    if (LIKELY(lir->opcode >= 0)) {
-      if (!lir->flags.is_nop) {
-        offset += lir->flags.size;
-      }
-    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
-      if (offset & 0x2) {
-        offset += 2;
-        lir->operands[0] = 1;
-      } else {
-        lir->operands[0] = 0;
-      }
-    }
-    /* Pseudo opcodes don't consume space */
-  }
-  return offset;
-}
-
-/*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
- */
-void Mir2Lir::AssignOffsets() {
-  int offset = AssignInsnOffsets();
-
-  /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
-
-  /* Set up offsets for literals */
-  data_offset_ = offset;
-
-  offset = AssignLiteralOffset(offset);
-
-  offset = AssignSwitchTablesOffset(offset);
-
-  offset = AssignFillArrayDataOffset(offset);
-
-  total_size_ = offset;
-}
-
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- */
-void Mir2Lir::AssembleLIR() {
-  AssignOffsets();
-  int assembler_retries = 0;
-  /*
-   * Assemble here.  Note that we generate code with optimistic assumptions
-   * and if found now to work, we'll have to redo the sequence and retry.
-   */
-
-  while (true) {
-    AssemblerStatus res = AssembleInstructions(0);
-    if (res == kSuccess) {
-      break;
-    } else {
-      assembler_retries++;
-      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
-        CodegenDump();
-        LOG(FATAL) << "Assembler error - too many retries";
-      }
-      // Redo offsets and try again
-      AssignOffsets();
-      code_buffer_.clear();
-    }
-  }
-
-  // Install literals
-  InstallLiteralPools();
-
-  // Install switch tables
-  InstallSwitchTables();
-
-  // Install fill array data
-  InstallFillArrayData();
-
-  // Create the mapping table and native offset to reference map.
-  CreateMappingTables();
-
-  CreateNativeGcMap();
-}
-
 /*
  * Insert a kPseudoCaseLabel at the beginning of the Dalvik
- * offset vaddr.  This label will be used to fix up the case
+ * offset vaddr if pretty-printing, otherise use the standard block
+ * label.  The selected label will be used to fix up the case
  * branch table during the assembly phase.  All resource flags
  * are set to prevent code motion.  KeyVal is just there for debugging.
  */
 LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) {
   LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id];
-  LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
-  new_label->dalvik_offset = vaddr;
-  new_label->opcode = kPseudoCaseLabel;
-  new_label->operands[0] = keyVal;
-  new_label->def_mask = ENCODE_ALL;
-  InsertLIRAfter(boundary_lir, new_label);
-  return new_label;
+  LIR* res = boundary_lir;
+  if (cu_->verbose) {
+    // Only pay the expense if we're pretty-printing.
+    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
+    new_label->dalvik_offset = vaddr;
+    new_label->opcode = kPseudoCaseLabel;
+    new_label->operands[0] = keyVal;
+    new_label->flags.fixup = kFixupLabel;
+    DCHECK(!new_label->flags.use_def_invalid);
+    new_label->u.m.def_mask = ENCODE_ALL;
+    InsertLIRAfter(boundary_lir, new_label);
+    res = new_label;
+  }
+  return res;
 }
 
 void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) {
@@ -951,6 +873,7 @@
       literal_list_(NULL),
       method_literal_list_(NULL),
       code_literal_list_(NULL),
+      first_fixup_(NULL),
       cu_(cu),
       mir_graph_(mir_graph),
       switch_tables_(arena, 4, kGrowableArraySwitchTables),
@@ -964,6 +887,7 @@
       total_size_(0),
       block_label_list_(NULL),
       current_dalvik_offset_(0),
+      estimated_native_code_size_(0),
       reg_pool_(NULL),
       live_sreg_(0),
       num_core_spills_(0),
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4dd55d7..9e71749 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -30,13 +30,14 @@
  */
 
 /*
- * Generate an kPseudoBarrier marker to indicate the boundary of special
+ * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
  */
 void Mir2Lir::GenBarrier() {
   LIR* barrier = NewLIR0(kPseudoBarrier);
   /* Mark all resources as being clobbered */
-  barrier->def_mask = -1;
+  DCHECK(!barrier->flags.use_def_invalid);
+  barrier->u.m.def_mask = ENCODE_ALL;
 }
 
 // FIXME: need to do some work to split out targets with
@@ -1761,4 +1762,16 @@
   suspend_launchpads_.Insert(launch_pad);
 }
 
+/* Call out to helper assembly routine that will null check obj and then lock it. */
+void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pLockObject), rl_src, true);
+}
+
+/* Call out to helper assembly routine that will null check obj and then unlock it. */
+void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  FlushAllRegs();
+  CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index ed83863..8270e01 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -810,7 +810,7 @@
       OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
       LIR* ld = OpVldm(TargetReg(kArg3), regs_left);
       // TUNING: loosen barrier
-      ld->def_mask = ENCODE_ALL;
+      ld->u.m.def_mask = ENCODE_ALL;
       SetMemRefType(ld, true /* is_load */, kDalvikReg);
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                direct_code, direct_method, type);
@@ -819,7 +819,7 @@
                                direct_code, direct_method, type);
       LIR* st = OpVstm(TargetReg(kArg3), regs_left);
       SetMemRefType(st, false /* is_load */, kDalvikReg);
-      st->def_mask = ENCODE_ALL;
+      st->u.m.def_mask = ENCODE_ALL;
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                direct_code, direct_method, type);
     }
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index cb7694d..f915779 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -21,8 +21,8 @@
 #define DEBUG_OPT(X)
 
 /* Check RAW, WAR, and RAW dependency on the register operands */
-#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \
-                                        ((use | def) & check->def_mask))
+#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \
+                                        ((use | def) & check->u.m.def_mask))
 
 /* Scheduler heuristics */
 #define MAX_HOIST_DISTANCE 20
@@ -30,10 +30,10 @@
 #define LD_LATENCY 2
 
 static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) {
-  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->alias_info);
-  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->alias_info);
-  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->alias_info);
-  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->alias_info);
+  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info);
+  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info);
+  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info);
+  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info);
 
   return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
 }
@@ -106,7 +106,7 @@
     bool is_this_lir_load = target_flags & IS_LOAD;
     LIR* check_lir;
     /* Use the mem mask to determine the rough memory location */
-    uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM;
+    uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM;
 
     /*
      * Currently only eliminate redundant ld/st for constant and Dalvik
@@ -116,10 +116,10 @@
       continue;
     }
 
-    uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM;
+    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
     uint64_t stop_use_reg_mask;
     if (cu_->instruction_set == kX86) {
-      stop_use_reg_mask = (IS_BRANCH | this_lir->use_mask) & ~ENCODE_MEM;
+      stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM;
     } else {
       /*
        * Add pc to the resource mask to prevent this instruction
@@ -127,7 +127,7 @@
        * region bits since stop_mask is used to check data/control
        * dependencies.
        */
-        stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->use_mask) & ~ENCODE_MEM;
+        stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM;
     }
 
     for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
@@ -139,7 +139,7 @@
         continue;
       }
 
-      uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM;
+      uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM;
       uint64_t alias_condition = this_mem_mask & check_mem_mask;
       bool stop_here = false;
 
@@ -159,7 +159,7 @@
            */
           DCHECK(!(check_flags & IS_STORE));
           /* Same value && same register type */
-          if (check_lir->alias_info == this_lir->alias_info &&
+          if (check_lir->flags.alias_info == this_lir->flags.alias_info &&
               SameRegType(check_lir->operands[0], native_reg_id)) {
             /*
              * Different destination register - insert
@@ -172,7 +172,7 @@
           }
         } else if (alias_condition == ENCODE_DALVIK_REG) {
           /* Must alias */
-          if (check_lir->alias_info == this_lir->alias_info) {
+          if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
             /* Only optimize compatible registers */
             bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id);
             if ((is_this_lir_load && is_check_lir_load) ||
@@ -297,7 +297,7 @@
       continue;
     }
 
-    uint64_t stop_use_all_mask = this_lir->use_mask;
+    uint64_t stop_use_all_mask = this_lir->u.m.use_mask;
 
     if (cu_->instruction_set != kX86) {
       /*
@@ -313,7 +313,7 @@
 
     /* Similar as above, but just check for pure register dependency */
     uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM;
-    uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM;
+    uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM;
 
     int next_slot = 0;
     bool stop_here = false;
@@ -328,7 +328,7 @@
         continue;
       }
 
-      uint64_t check_mem_mask = check_lir->def_mask & ENCODE_MEM;
+      uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM;
       uint64_t alias_condition = stop_use_all_mask & check_mem_mask;
       stop_here = false;
 
@@ -337,7 +337,7 @@
         /* We can fully disambiguate Dalvik references */
         if (alias_condition == ENCODE_DALVIK_REG) {
           /* Must alias or partually overlap */
-          if ((check_lir->alias_info == this_lir->alias_info) ||
+          if ((check_lir->flags.alias_info == this_lir->flags.alias_info) ||
             IsDalvikRegisterClobbered(this_lir, check_lir)) {
             stop_here = true;
           }
@@ -406,7 +406,7 @@
         LIR* prev_lir = prev_inst_list[slot+1];
 
         /* Check the highest instruction */
-        if (prev_lir->def_mask == ENCODE_ALL) {
+        if (prev_lir->u.m.def_mask == ENCODE_ALL) {
           /*
            * If the first instruction is a load, don't hoist anything
            * above it since it is unlikely to be beneficial.
@@ -436,7 +436,7 @@
          */
         bool prev_is_load = is_pseudo_opcode(prev_lir->opcode) ? false :
             (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD);
-        if (((cur_lir->use_mask & prev_lir->def_mask) && prev_is_load) || (slot < LD_LATENCY)) {
+        if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) {
           break;
         }
       }
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index dbd668b..3a6207c 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -526,7 +526,7 @@
       continue;
     }
 
-    if (lir->flags.pcRelFixup) {
+    if (lir->flags.fixup != kFixupNone) {
       if (lir->opcode == kMipsDelta) {
         /*
          * The "Delta" pseudo-ops load the difference between
@@ -710,4 +710,97 @@
   return EncodingMap[lir->opcode].size;
 }
 
+// LIR offset assignment.
+// TODO: consolidate w/ Arm assembly mechanism.
+int MipsMir2Lir::AssignInsnOffsets() {
+  LIR* lir;
+  int offset = 0;
+
+  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (LIKELY(lir->opcode >= 0)) {
+      if (!lir->flags.is_nop) {
+        offset += lir->flags.size;
+      }
+    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+      if (offset & 0x2) {
+        offset += 2;
+        lir->operands[0] = 1;
+      } else {
+        lir->operands[0] = 0;
+      }
+    }
+    /* Pseudo opcodes don't consume space */
+  }
+  return offset;
+}
+
+/*
+ * Walk the compilation unit and assign offsets to instructions
+ * and literals and compute the total size of the compiled unit.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void MipsMir2Lir::AssignOffsets() {
+  int offset = AssignInsnOffsets();
+
+  /* Const values have to be word aligned */
+  offset = (offset + 3) & ~3;
+
+  /* Set up offsets for literals */
+  data_offset_ = offset;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  offset = AssignFillArrayDataOffset(offset);
+
+  total_size_ = offset;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void MipsMir2Lir::AssembleLIR() {
+  AssignOffsets();
+  int assembler_retries = 0;
+  /*
+   * Assemble here.  Note that we generate code with optimistic assumptions
+   * and if found now to work, we'll have to redo the sequence and retry.
+   */
+
+  while (true) {
+    AssemblerStatus res = AssembleInstructions(0);
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      // Redo offsets and try again
+      AssignOffsets();
+      code_buffer_.clear();
+    }
+  }
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index d53c012..9a5ca2c 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,36 +261,6 @@
   MarkSafepointPC(call_inst);
 }
 
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pLockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
-void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rMIPS_ARG0);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pUnlockObject));
-  ClobberCalleeSave();
-  LIR* call_inst = OpReg(kOpBlx, r_tgt);
-  MarkSafepointPC(call_inst);
-}
-
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -318,6 +288,7 @@
   FreeTemp(reg_card_base);
   FreeTemp(reg_card_no);
 }
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 8d0b347..892af09 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -71,9 +71,12 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -123,8 +126,6 @@
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                                int first_bit, int second_bit);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 8e768dc..f9d432e 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -120,22 +120,21 @@
 }
 
 
-void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir) {
+void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kMips);
+  DCHECK(!lir->flags.use_def_invalid);
 
   // Mips-specific resource map setup here.
-  uint64_t flags = MipsMir2Lir::EncodingMap[lir->opcode].flags;
-
   if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_MIPS_REG_SP;
+    lir->u.m.def_mask |= ENCODE_MIPS_REG_SP;
   }
 
   if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_MIPS_REG_SP;
+    lir->u.m.use_mask |= ENCODE_MIPS_REG_SP;
   }
 
   if (flags & REG_DEF_LR) {
-    lir->def_mask |= ENCODE_MIPS_REG_LR;
+    lir->u.m.def_mask |= ENCODE_MIPS_REG_LR;
   }
 }
 
@@ -269,8 +268,8 @@
     }
     /* Memory bits */
     if (mips_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", mips_lir->alias_info & 0xffff,
-              (mips_lir->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info),
+              DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 0ca8d8d..314c57e 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -58,7 +58,8 @@
   if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) ||
       (opcode == kPseudoExportedPC)) {
     // Always make labels scheduling barriers
-    insn->use_mask = insn->def_mask = ENCODE_ALL;
+    DCHECK(!insn->flags.use_def_invalid);
+    insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL;
   }
   return insn;
 }
@@ -141,20 +142,21 @@
 inline void Mir2Lir::SetupResourceMasks(LIR* lir) {
   int opcode = lir->opcode;
 
-  if (opcode <= 0) {
-    lir->use_mask = lir->def_mask = 0;
+  if ((opcode < 0) && (opcode != kPseudoBarrier)) {
+    lir->flags.fixup = kFixupLabel;
     return;
   }
 
   uint64_t flags = GetTargetInstFlags(opcode);
 
   if (flags & NEEDS_FIXUP) {
-    lir->flags.pcRelFixup = true;
+    // Note: target-specific setup may specialize the fixup kind.
+    lir->flags.fixup = kFixupLabel;
   }
 
   /* Get the starting size of the instruction's template */
   lir->flags.size = GetInsnSize(lir);
-
+  estimated_native_code_size_ += lir->flags.size;
   /* Set up the mask for resources that are updated */
   if (flags & (IS_LOAD | IS_STORE)) {
     /* Default to heap - will catch specialized classes later */
@@ -166,39 +168,44 @@
    * turn will trash everything.
    */
   if (flags & IS_BRANCH) {
-    lir->def_mask = lir->use_mask = ENCODE_ALL;
+    lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL;
     return;
   }
 
   if (flags & REG_DEF0) {
-    SetupRegMask(&lir->def_mask, lir->operands[0]);
+    SetupRegMask(&lir->u.m.def_mask, lir->operands[0]);
   }
 
   if (flags & REG_DEF1) {
-    SetupRegMask(&lir->def_mask, lir->operands[1]);
+    SetupRegMask(&lir->u.m.def_mask, lir->operands[1]);
   }
 
+  if (flags & REG_USE0) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[0]);
+  }
+
+  if (flags & REG_USE1) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[1]);
+  }
+
+  if (flags & REG_USE2) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[2]);
+  }
+
+  if (flags & REG_USE3) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
+  }
 
   if (flags & SETS_CCODES) {
-    lir->def_mask |= ENCODE_CCODE;
-  }
-
-  if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
-    int i;
-
-    for (i = 0; i < 4; i++) {
-      if (flags & (1 << (kRegUse0 + i))) {
-        SetupRegMask(&lir->use_mask, lir->operands[i]);
-      }
-    }
+    lir->u.m.def_mask |= ENCODE_CCODE;
   }
 
   if (flags & USES_CCODES) {
-    lir->use_mask |= ENCODE_CCODE;
+    lir->u.m.use_mask |= ENCODE_CCODE;
   }
 
   // Handle target-specific actions
-  SetupTargetResourceMasks(lir);
+  SetupTargetResourceMasks(lir, flags);
 }
 
 inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(int reg) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6f39869..66ece2c 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -697,6 +697,7 @@
 
   // Insert the block label.
   block_label_list_[block_id].opcode = kPseudoNormalBlockLabel;
+  block_label_list_[block_id].flags.fixup = kFixupLabel;
   AppendLIR(&block_label_list_[block_id]);
 
   LIR* head_lir = NULL;
@@ -746,7 +747,8 @@
     if (head_lir == NULL) {
       head_lir = &block_label_list_[bb->id];
       // Set the first label as a scheduling barrier.
-      head_lir->def_mask = ENCODE_ALL;
+      DCHECK(!head_lir->flags.use_def_invalid);
+      head_lir->u.m.def_mask = ENCODE_ALL;
     }
 
     if (opcode == kMirOpCheck) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 7d6f968..61f4484 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -95,6 +95,7 @@
 struct CallInfo;
 struct CompilationUnit;
 struct MIR;
+struct LIR;
 struct RegLocation;
 struct RegisterInfo;
 class MIRGraph;
@@ -107,24 +108,36 @@
 
 typedef std::vector<uint8_t> CodeBuffer;
 
+struct UseDefMasks {
+  uint64_t use_mask;        // Resource mask for use.
+  uint64_t def_mask;        // Resource mask for def.
+};
+
+struct AssemblyInfo {
+  LIR* pcrel_next;           // Chain of LIR nodes needing pc relative fixups.
+  uint8_t bytes[16];         // Encoded instruction bytes.
+};
 
 struct LIR {
   int offset;               // Offset of this instruction.
-  int dalvik_offset;        // Offset of Dalvik opcode.
+  uint16_t dalvik_offset;   // Offset of Dalvik opcode in code units (16-bit words).
+  int16_t opcode;
   LIR* next;
   LIR* prev;
   LIR* target;
-  int opcode;
-  int operands[5];          // [0..4] = [dest, src1, src2, extra, extra2].
   struct {
-    bool is_nop:1;          // LIR is optimized away.
-    bool pcRelFixup:1;      // May need pc-relative fixup.
-    unsigned int size:5;    // Note: size is in bytes.
-    unsigned int unused:25;
+    unsigned int alias_info:17;  // For Dalvik register disambiguation.
+    bool is_nop:1;               // LIR is optimized away.
+    unsigned int size:4;         // Note: size of encoded instruction is in bytes.
+    bool use_def_invalid:1;      // If true, masks should not be used.
+    unsigned int generation:1;   // Used to track visitation state during fixup pass.
+    unsigned int fixup:8;        // Fixup kind.
   } flags;
-  int alias_info;           // For Dalvik register & litpool disambiguation.
-  uint64_t use_mask;        // Resource mask for use.
-  uint64_t def_mask;        // Resource mask for def.
+  union {
+    UseDefMasks m;          // Use & Def masks used during optimization.
+    AssemblyInfo a;         // Instruction encoding used during assembly phase.
+  } u;
+  int operands[5];          // [0..4] = [dest, src1, src2, extra, extra2].
 };
 
 // Target-specific initialization.
@@ -141,7 +154,7 @@
 
 // Defines for alias_info (tracks Dalvik register references).
 #define DECODE_ALIAS_INFO_REG(X)        (X & 0xffff)
-#define DECODE_ALIAS_INFO_WIDE_FLAG     (0x80000000)
+#define DECODE_ALIAS_INFO_WIDE_FLAG     (0x10000)
 #define DECODE_ALIAS_INFO_WIDE(X)       ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0)
 #define ENCODE_ALIAS_INFO(REG, ISWIDE)  (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0))
 
@@ -255,7 +268,6 @@
     void MarkSafepointPC(LIR* inst);
     bool FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile);
     void SetupResourceMasks(LIR* lir);
-    void AssembleLIR();
     void SetMemRefType(LIR* lir, bool is_load, int mem_type);
     void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit);
     void SetupRegMask(uint64_t* mask, int reg);
@@ -295,8 +307,6 @@
     int AssignLiteralOffset(int offset);
     int AssignSwitchTablesOffset(int offset);
     int AssignFillArrayDataOffset(int offset);
-    int AssignInsnOffsets();
-    void AssignOffsets();
     LIR* InsertCaseLabel(int vaddr, int keyVal);
     void MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec);
@@ -571,9 +581,9 @@
     virtual void CompilerInitializeRegAlloc() = 0;
 
     // Required for target - miscellaneous.
-    virtual AssemblerStatus AssembleInstructions(uintptr_t start_addr) = 0;
+    virtual void AssembleLIR() = 0;
     virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0;
-    virtual void SetupTargetResourceMasks(LIR* lir) = 0;
+    virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0;
     virtual const char* GetTargetInstFmt(int opcode) = 0;
     virtual const char* GetTargetInstName(int opcode) = 0;
     virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0;
@@ -629,8 +639,6 @@
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
-    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0;
-    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit, int first_bit,
@@ -689,6 +697,10 @@
     virtual bool InexpensiveConstantLong(int64_t value) = 0;
     virtual bool InexpensiveConstantDouble(int64_t value) = 0;
 
+    // May be optimized by targets.
+    virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
+    virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
+
     // Temp workaround
     void Workaround7250540(RegLocation rl_dest, int value);
 
@@ -719,6 +731,7 @@
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
+    LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
 
   protected:
     CompilationUnit* const cu_;
@@ -741,6 +754,7 @@
      * immediately preceed the instruction.
      */
     std::vector<uint32_t> dex2pc_mapping_table_;
+    int current_code_offset_;             // Working byte offset of machine instructons.
     int data_offset_;                     // starting offset of literal pool.
     int total_size_;                      // header + code size.
     LIR* block_label_list_;
@@ -755,6 +769,7 @@
      * The low-level LIR creation utilites will pull it from here.  Rework this.
      */
     int current_dalvik_offset_;
+    int estimated_native_code_size_;     // Just an estimate; used to reserve code_buffer_ size.
     RegisterPool* reg_pool_;
     /*
      * Sanity checking for the register temp tracking.  The same ssa
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 3e76883..b1634da 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1174,7 +1174,7 @@
       continue;
     }
 
-    if (lir->flags.pcRelFixup) {
+    if (lir->flags.fixup != kFixupNone) {
       switch (lir->opcode) {
         case kX86Jcc8: {
           LIR *target_lir = lir->target;
@@ -1385,4 +1385,97 @@
   return res;
 }
 
+// LIR offset assignment.
+// TODO: consolidate w/ Arm assembly mechanism.
+int X86Mir2Lir::AssignInsnOffsets() {
+  LIR* lir;
+  int offset = 0;
+
+  for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
+    lir->offset = offset;
+    if (LIKELY(lir->opcode >= 0)) {
+      if (!lir->flags.is_nop) {
+        offset += lir->flags.size;
+      }
+    } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
+      if (offset & 0x2) {
+        offset += 2;
+        lir->operands[0] = 1;
+      } else {
+        lir->operands[0] = 0;
+      }
+    }
+    /* Pseudo opcodes don't consume space */
+  }
+  return offset;
+}
+
+/*
+ * Walk the compilation unit and assign offsets to instructions
+ * and literals and compute the total size of the compiled unit.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void X86Mir2Lir::AssignOffsets() {
+  int offset = AssignInsnOffsets();
+
+  /* Const values have to be word aligned */
+  offset = (offset + 3) & ~3;
+
+  /* Set up offsets for literals */
+  data_offset_ = offset;
+
+  offset = AssignLiteralOffset(offset);
+
+  offset = AssignSwitchTablesOffset(offset);
+
+  offset = AssignFillArrayDataOffset(offset);
+
+  total_size_ = offset;
+}
+
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ * TODO: consolidate w/ Arm assembly mechanism.
+ */
+void X86Mir2Lir::AssembleLIR() {
+  AssignOffsets();
+  int assembler_retries = 0;
+  /*
+   * Assemble here.  Note that we generate code with optimistic assumptions
+   * and if found now to work, we'll have to redo the sequence and retry.
+   */
+
+  while (true) {
+    AssemblerStatus res = AssembleInstructions(0);
+    if (res == kSuccess) {
+      break;
+    } else {
+      assembler_retries++;
+      if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
+        CodegenDump();
+        LOG(FATAL) << "Assembler error - too many retries";
+      }
+      // Redo offsets and try again
+      AssignOffsets();
+      code_buffer_.clear();
+    }
+  }
+
+  // Install literals
+  InstallLiteralPools();
+
+  // Install switch tables
+  InstallSwitchTables();
+
+  // Install fill array data
+  InstallFillArrayData();
+
+  // Create the mapping table and native offset to reference map.
+  CreateMappingTables();
+
+  CreateNativeGcMap();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 2be2aa9..7fad6f0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -150,43 +150,6 @@
                           rX86_ARG1, true);
 }
 
-void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rCX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rCX, opt_flags);
-  // If lock is unheld, try to grab it quickly with compare and exchange
-  // TODO: copy and clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR2(kX86Xor32RR, rAX, rAX);
-  NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
-  // If lock is held, go the expensive route - artLockObjectFromCode(self, obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pLockObject), rCX, true);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-}
-
-void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
-  FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rAX);  // Get obj
-  LockCallTemps();  // Prepare for explicit register usage
-  GenNullCheck(rl_src.s_reg_low, rAX, opt_flags);
-  // If lock is held by the current thread, clear it to quickly release it
-  // TODO: clear hash state?
-  NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
-  NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
-  NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value());
-  OpRegReg(kOpSub, rCX, rDX);
-  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
-  NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX);
-  LIR* branch2 = NewLIR1(kX86Jmp8, 0);
-  branch->target = NewLIR0(kPseudoTargetLabel);
-  // Otherwise, go the expensive route - UnlockObjectFromCode(obj);
-  CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rAX, true);
-  branch2->target = NewLIR0(kPseudoTargetLabel);
-}
-
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 0f28110..d5c21e5 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -71,9 +71,12 @@
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
+    void AssembleLIR();
+    int AssignInsnOffsets();
+    void AssignOffsets();
     AssemblerStatus AssembleInstructions(uintptr_t start_addr);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
-    void SetupTargetResourceMasks(LIR* lir);
+    void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
     const char* GetTargetInstName(int opcode);
     std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr);
@@ -123,8 +126,6 @@
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
     void GenMemBarrier(MemBarrierKind barrier_kind);
-    void GenMonitorEnter(int opt_flags, RegLocation rl_src);
-    void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result,
                                                int lit, int first_bit, int second_bit);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 94dd759..f080830 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,37 +132,36 @@
   return 0ULL;
 }
 
-void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir) {
+void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kX86);
+  DCHECK(!lir->flags.use_def_invalid);
 
   // X86-specific resource map setup here.
-  uint64_t flags = X86Mir2Lir::EncodingMap[lir->opcode].flags;
-
   if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_X86_REG_SP;
+    lir->u.m.use_mask |= ENCODE_X86_REG_SP;
   }
 
   if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_X86_REG_SP;
+    lir->u.m.def_mask |= ENCODE_X86_REG_SP;
   }
 
   if (flags & REG_DEFA) {
-    SetupRegMask(&lir->def_mask, rAX);
+    SetupRegMask(&lir->u.m.def_mask, rAX);
   }
 
   if (flags & REG_DEFD) {
-    SetupRegMask(&lir->def_mask, rDX);
+    SetupRegMask(&lir->u.m.def_mask, rDX);
   }
   if (flags & REG_USEA) {
-    SetupRegMask(&lir->use_mask, rAX);
+    SetupRegMask(&lir->u.m.use_mask, rAX);
   }
 
   if (flags & REG_USEC) {
-    SetupRegMask(&lir->use_mask, rCX);
+    SetupRegMask(&lir->u.m.use_mask, rCX);
   }
 
   if (flags & REG_USED) {
-    SetupRegMask(&lir->use_mask, rDX);
+    SetupRegMask(&lir->u.m.use_mask, rDX);
   }
 }
 
@@ -275,8 +274,8 @@
     }
     /* Memory bits */
     if (x86LIR && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", x86LIR->alias_info & 0xffff,
-              (x86LIR->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info),
+              (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6464a4c..d4be7c0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,6 +23,8 @@
 #include "compiler/oat_writer.h"
 #include "gc/space/image_space.h"
 #include "image.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
 #include "signal_catcher.h"
 #include "UniquePtr.h"
 #include "utils.h"
@@ -158,7 +160,7 @@
       // non image classes should be in a space after the image.
       EXPECT_GT(reinterpret_cast<byte*>(klass), image_end) << descriptor;
     }
-    EXPECT_TRUE(Monitor::IsValidLockWord(*klass->GetRawLockWordAddress()));
+    EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord()));
   }
 }
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index aa439cc..feb495e 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -82,7 +82,6 @@
 #include "ir_builder.h"
 #include "os.h"
 #include "runtime_support_builder_arm.h"
-#include "runtime_support_builder_thumb2.h"
 #include "runtime_support_builder_x86.h"
 #include "utils_llvm.h"
 
@@ -118,12 +117,10 @@
   default:
     runtime_support_.reset(new RuntimeSupportBuilder(*context_, *module_, *irb_));
     break;
+  case kThumb2:
   case kArm:
     runtime_support_.reset(new RuntimeSupportBuilderARM(*context_, *module_, *irb_));
     break;
-  case kThumb2:
-    runtime_support_.reset(new RuntimeSupportBuilderThumb2(*context_, *module_, *irb_));
-    break;
   case kX86:
     runtime_support_.reset(new RuntimeSupportBuilderX86(*context_, *module_, *irb_));
     break;
diff --git a/compiler/llvm/runtime_support_builder.cc b/compiler/llvm/runtime_support_builder.cc
index 24e283d..c825fbf 100644
--- a/compiler/llvm/runtime_support_builder.cc
+++ b/compiler/llvm/runtime_support_builder.cc
@@ -164,89 +164,13 @@
 /* Monitor */
 
 void RuntimeSupportBuilder::EmitLockObject(::llvm::Value* object) {
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* real_monitor =
-      irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, unheld and not recursively acquired.
-  Value* unheld = irb_.CreateICmpEQ(real_monitor, irb_.getInt32(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "lock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "lock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(unheld, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-
-  // Calculate new monitor: new = old | (lock_id << LW_LOCK_OWNER_SHIFT)
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getInt32Ty(), kTBAARuntimeInfo);
-
-  Value* owner = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* new_monitor = irb_.CreateOr(monitor, owner);
-
-  // Atomically update monitor.
-  Value* old_monitor =
-      irb_.CompareExchangeObjectOffset(object,
-                                       mirror::Object::MonitorOffset().Int32Value(),
-                                       monitor, new_monitor, kTBAARuntimeInfo);
-
-  Value* retry_slow_path = irb_.CreateICmpEQ(old_monitor, monitor);
-  irb_.CreateCondBr(retry_slow_path, bb_cont, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 void RuntimeSupportBuilder::EmitUnlockObject(::llvm::Value* object) {
-  Value* lock_id =
-      EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(),
-                               irb_.getJIntTy(),
-                               kTBAARuntimeInfo);
-  Value* monitor =
-      irb_.LoadFromObjectOffset(object,
-                                mirror::Object::MonitorOffset().Int32Value(),
-                                irb_.getJIntTy(),
-                                kTBAARuntimeInfo);
-
-  Value* my_monitor = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT);
-  Value* hash_state = irb_.CreateAnd(monitor, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-  Value* real_monitor = irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
-
-  // Is thin lock, held by us and not recursively acquired
-  Value* is_fast_path = irb_.CreateICmpEQ(real_monitor, my_monitor);
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* bb_fast = BasicBlock::Create(context_, "unlock_fast", parent_func);
-  BasicBlock* bb_slow = BasicBlock::Create(context_, "unlock_slow", parent_func);
-  BasicBlock* bb_cont = BasicBlock::Create(context_, "unlock_cont", parent_func);
-  irb_.CreateCondBr(is_fast_path, bb_fast, bb_slow, kLikely);
-
-  irb_.SetInsertPoint(bb_fast);
-  // Set all bits to zero (except hash state)
-  irb_.StoreToObjectOffset(object,
-                           mirror::Object::MonitorOffset().Int32Value(),
-                           hash_state,
-                           kTBAARuntimeInfo);
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_slow);
   Function* slow_func = GetRuntimeSupportFunction(runtime_support::UnlockObject);
   irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(bb_cont);
-
-  irb_.SetInsertPoint(bb_cont);
 }
 
 
diff --git a/compiler/llvm/runtime_support_builder.h b/compiler/llvm/runtime_support_builder.h
index e92ac0a..898611a 100644
--- a/compiler/llvm/runtime_support_builder.h
+++ b/compiler/llvm/runtime_support_builder.h
@@ -64,8 +64,8 @@
   virtual void EmitTestSuspend();
 
   /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
+  void EmitLockObject(::llvm::Value* object);
+  void EmitUnlockObject(::llvm::Value* object);
 
   /* MarkGCCard */
   virtual void EmitMarkGCCard(::llvm::Value* value, ::llvm::Value* target_addr);
diff --git a/compiler/llvm/runtime_support_builder_arm.cc b/compiler/llvm/runtime_support_builder_arm.cc
index 569d825..cad4624 100644
--- a/compiler/llvm/runtime_support_builder_arm.cc
+++ b/compiler/llvm/runtime_support_builder_arm.cc
@@ -116,24 +116,5 @@
   return old_thread_register;
 }
 
-
-/* Monitor */
-
-void RuntimeSupportBuilderARM::EmitLockObject(Value* object) {
-  RuntimeSupportBuilder::EmitLockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
-void RuntimeSupportBuilderARM::EmitUnlockObject(Value* object) {
-  RuntimeSupportBuilder::EmitUnlockObject(object);
-  FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                            /*isVarArg=*/false);
-  InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true);
-  irb_.CreateCall(func);
-}
-
 }  // namespace llvm
 }  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_arm.h b/compiler/llvm/runtime_support_builder_arm.h
index 5a353d7..0d01509 100644
--- a/compiler/llvm/runtime_support_builder_arm.h
+++ b/compiler/llvm/runtime_support_builder_arm.h
@@ -34,10 +34,6 @@
   virtual void EmitStoreToThreadOffset(int64_t offset, ::llvm::Value* value,
                                        TBAASpecialType s_ty);
   virtual ::llvm::Value* EmitSetCurrentThread(::llvm::Value* thread);
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-  virtual void EmitUnlockObject(::llvm::Value* object);
 };
 
 }  // namespace llvm
diff --git a/compiler/llvm/runtime_support_builder_thumb2.cc b/compiler/llvm/runtime_support_builder_thumb2.cc
deleted file mode 100644
index eff29c8..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "runtime_support_builder_thumb2.h"
-
-#include "ir_builder.h"
-#include "mirror/object.h"
-#include "monitor.h"
-#include "thread.h"
-#include "utils_llvm.h"
-
-#include <llvm/IR/DerivedTypes.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/InlineAsm.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Type.h>
-
-#include <inttypes.h>
-#include <vector>
-
-using ::llvm::BasicBlock;
-using ::llvm::Function;
-using ::llvm::FunctionType;
-using ::llvm::InlineAsm;
-using ::llvm::Type;
-using ::llvm::Value;
-
-namespace art {
-namespace llvm {
-
-
-void RuntimeSupportBuilderThumb2::EmitLockObject(Value* object) {
-  FunctionType* func_ty = FunctionType::get(/*Result=*/irb_.getInt32Ty(),
-                                            /*Params=*/irb_.getJObjectTy(),
-                                            /*isVarArg=*/false);
-  // $0: result
-  // $1: object
-  // $2: temp
-  // $3: temp
-  std::string asms;
-  StringAppendF(&asms, "add $3, $1, #%" PRId32 "\n", mirror::Object::MonitorOffset().Int32Value());
-  StringAppendF(&asms, "ldr $2, [r9, #%" PRId32 "]\n", Thread::ThinLockIdOffset().Int32Value());
-  StringAppendF(&asms, "ldrex $0, [$3]\n");
-  StringAppendF(&asms, "lsl $2, $2, %d\n", LW_LOCK_OWNER_SHIFT);
-  StringAppendF(&asms, "bfi $2, $0, #0, #%d\n", LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "bfc $0, #%d, #%d\n", LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1);
-  StringAppendF(&asms, "cmp $0, #0\n");
-  StringAppendF(&asms, "it eq\n");
-  StringAppendF(&asms, "strexeq $0, $2, [$3]\n");
-
-  InlineAsm* func = InlineAsm::get(func_ty, asms, "=&l,l,~l,~l", true);
-
-  Value* retry_slow_path = irb_.CreateCall(func, object);
-  retry_slow_path = irb_.CreateICmpNE(retry_slow_path, irb_.getJInt(0));
-
-  Function* parent_func = irb_.GetInsertBlock()->getParent();
-  BasicBlock* basic_block_lock = BasicBlock::Create(context_, "lock", parent_func);
-  BasicBlock* basic_block_cont = BasicBlock::Create(context_, "lock_cont", parent_func);
-  irb_.CreateCondBr(retry_slow_path, basic_block_lock, basic_block_cont, kUnlikely);
-
-  irb_.SetInsertPoint(basic_block_lock);
-  Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject);
-  irb_.CreateCall2(slow_func, object, EmitGetCurrentThread());
-  irb_.CreateBr(basic_block_cont);
-
-  irb_.SetInsertPoint(basic_block_cont);
-  {  // Memory barrier
-    FunctionType* asm_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_),
-                                              /*isVarArg=*/false);
-    InlineAsm* func = InlineAsm::get(asm_ty, "dmb sy", "", true);
-    irb_.CreateCall(func);
-  }
-}
-
-
-}  // namespace llvm
-}  // namespace art
diff --git a/compiler/llvm/runtime_support_builder_thumb2.h b/compiler/llvm/runtime_support_builder_thumb2.h
deleted file mode 100644
index c47a274..0000000
--- a/compiler/llvm/runtime_support_builder_thumb2.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-#define ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
-
-#include "runtime_support_builder_arm.h"
-
-namespace art {
-namespace llvm {
-
-class RuntimeSupportBuilderThumb2 : public RuntimeSupportBuilderARM {
- public:
-  RuntimeSupportBuilderThumb2(::llvm::LLVMContext& context, ::llvm::Module& module, IRBuilder& irb)
-    : RuntimeSupportBuilderARM(context, module, irb) {}
-
-  /* Monitor */
-  virtual void EmitLockObject(::llvm::Value* object);
-};
-
-}  // namespace llvm
-}  // namespace art
-
-#endif  // ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 52584cf..a046391 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -38,7 +38,7 @@
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
 LOCAL_SHARED_LIBRARIES := libnativehelper
-LOCAL_LDFLAGS := -ldl
+LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cc6b5d7..aacbbf5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -777,7 +777,17 @@
     }
     os << "STATS:\n" << std::flush;
     UniquePtr<File> file(OS::OpenFileForReading(image_filename_.c_str()));
-    stats_.file_bytes = file->GetLength();
+    if (file.get() == NULL) {
+      std::string cache_location(GetDalvikCacheFilenameOrDie(image_filename_));
+      file.reset(OS::OpenFileForReading(cache_location.c_str()));
+      if (file.get() == NULL) {
+          LOG(WARNING) << "Failed to find image in " << image_filename_
+                       << " and " << cache_location;
+      }
+    }
+    if (file.get() != NULL) {
+        stats_.file_bytes = file->GetLength();
+    }
     size_t header_bytes = sizeof(ImageHeader);
     stats_.header_bytes = header_bytes;
     size_t alignment_bytes = RoundUp(header_bytes, kObjectAlignment) - header_bytes;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 5edf759..d8abbf1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -243,6 +243,7 @@
 	jdwp/jdwp.h \
 	jdwp/jdwp_constants.h \
 	locks.h \
+	lock_word.h \
 	mirror/class.h \
 	thread.h \
 	thread_state.h \
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index ed3d476..69fb9c3 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -27,5 +27,7 @@
 #define THREAD_FLAGS_OFFSET 0
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5b2dd6c..cb61698 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -318,22 +318,67 @@
 END art_quick_handle_fill_data
 
     /*
-     * Entry from managed code that calls artLockObjectFromCode, may block for GC.
+     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
+     * possibly null object to lock.
      */
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
+    cbz    r0, slow_lock
+retry_lock:
+    ldrex  r1, [r0, #LOCK_WORD_OFFSET]
+    ldrt   r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_lock                  @ lock word contains a monitor
+    bne    already_thin
+    @ unlocked case - r2 holds thread id with count of 0
+    strex  r3, r2, [r0, #LOCK_WORD_OFFSET]
+    cbnz   r3, strex_fail             @ store failed, retry
+    bx lr
+strex_fail:
+    b retry_lock                      @ unlikely forward branch, need to reload and recheck r1/r2
+already_thin:
+    eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r2, r2                     @ zero top 16 bits
+    cbnz   r2, slow_lock              @ lock word and self thread id's match -> recursive lock
+                                      @ else contention, go to slow path
+    adds   r2, r1, #65536             @ increment count in lock word placing in r2 for storing
+    bmi    slow_lock                  @ if we overflow the count go slow
+    str    r2, [r0, #LOCK_WORD_OFFSET] @ no need for strex as we hold the lock
+    bx lr
+slow_lock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case we block
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
     bl     artLockObjectFromCode      @ (Object* obj, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object
 
     /*
      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
+     * r0 holds the possibly null object to lock.
      */
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
+    cbz    r0, slow_unlock
+    ldr    r1, [r0, #LOCK_WORD_OFFSET]
+    ldr    r2, [r9, #THREAD_ID_OFFSET]
+    cmp    r1, #0
+    bmi    slow_unlock                @ lock word contains a monitor
+    eor    r3, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
+    uxth   r3, r3                     @ zero top 16 bits
+    cbnz   r3, slow_unlock            @ do lock word and self thread id's match?
+    cmp    r1, #65536
+    bpl    recursive_thin_unlock
+    @ transition to unlocked, r3 holds 0
+    str    r3, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+recursive_thin_unlock:
+    sub    r1, r1, #65536
+    str    r1, [r0, #LOCK_WORD_OFFSET]
+    bx     lr
+slow_unlock:
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case exception allocation triggers GC
     mov    r1, r9                     @ pass Thread::Current
     mov    r2, sp                     @ pass SP
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index ea908be..75eef60 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -24,6 +24,7 @@
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index 1092910..d4e0927 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -23,5 +23,7 @@
 #define THREAD_SELF_OFFSET 40
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 60
 
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 06b2203..6be73d1 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -401,14 +401,85 @@
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_EAX_NOT_ZERO
 
-ONE_ARG_DOWNCALL art_quick_lock_object, artLockObjectFromCode, ret
-ONE_ARG_DOWNCALL art_quick_unlock_object, artUnlockObjectFromCode, RETURN_IF_EAX_ZERO
-
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
+DEFINE_FUNCTION art_quick_lock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_lock
+retry_lock:
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_lock                        // lock word contains a monitor
+    jnz  already_thin                     // lock word contains a thin lock
+    // unlocked case - %edx holds thread id with count of 0
+    movl %eax, %ecx                       // remember object in case of retry
+    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
+    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%ecx)
+    jnz  cmpxchg_fail                     // cmpxchg failed retry
+    ret
+cmpxchg_fail:
+    movl  %ecx, %eax                       // restore eax
+    jmp  retry_lock
+already_thin:
+    cmpw %ax, %dx                         // do we hold the lock already?
+    jne  slow_lock
+    addl LITERAL(65536), %eax             // increment recursion count
+    jb   slow_lock                        // count overflowed so go slow
+    movl %eax, LOCK_WORD_OFFSET(%ecx)     // update lockword, cmpxchg not necessary as we hold lock
+    ret
+slow_lock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artLockObjectFromCode    // artLockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_lock_object
+
+DEFINE_FUNCTION art_quick_unlock_object
+    testl %eax, %eax                      // null check object/eax
+    jz   slow_unlock
+    movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
+    movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
+    test %ecx, %ecx
+    jb   slow_unlock                      // lock word contains a monitor
+    cmpw %cx, %dx                         // does the thread id match?
+    jne  slow_unlock
+    cmpl LITERAL(65536), %ecx
+    jae  recursive_thin_unlock
+    movl LITERAL(0), LOCK_WORD_OFFSET(%eax)
+    ret
+recursive_thin_unlock:
+    subl LITERAL(65536), %ecx
+    mov  %ecx, LOCK_WORD_OFFSET(%eax)
+    ret
+slow_unlock:
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
+    mov %esp, %edx                // remember SP
+    // Outgoing argument set up
+    PUSH eax                      // push padding
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH eax                      // pass object
+    call artUnlockObjectFromCode    // artUnlockObjectFromCode(object, Thread*, SP)
+    addl MACRO_LITERAL(16), %esp  // pop arguments
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RETURN_IF_EAX_ZERO
+END_FUNCTION art_quick_unlock_object
+
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                     // alignment padding
-    PUSH ecx                    // pass arg2
+    PUSH ecx                     // pass arg2
     PUSH eax                     // pass arg1
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
     addl LITERAL(12), %esp        // pop arguments
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index dd3e7dd..7e0aee0 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -134,6 +134,7 @@
 
   // Sanity check other offsets.
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
 }  // namespace art
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index aca93a5..d2eaf8e 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -21,6 +21,9 @@
 // check.
 #define SUSPEND_CHECK_INTERVAL (1000)
 
+// Offsets within java.lang.Object.
+#define LOCK_WORD_OFFSET 4
+
 // Offsets within java.lang.String.
 #define STRING_VALUE_OFFSET 8
 #define STRING_COUNT_OFFSET 12
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b048bbb..249f031 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -54,17 +54,17 @@
   std::set<BaseMutex*>* all_mutexes;
   AllMutexData() : all_mutexes(NULL) {}
 };
-static struct AllMutexData all_mutex_data[kAllMutexDataSize];
+static struct AllMutexData gAllMutexData[kAllMutexDataSize];
 
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(0, reinterpret_cast<int32_t>(mutex))) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!all_mutex_data->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
+    while (!gAllMutexData->all_mutexes_guard.compare_and_swap(reinterpret_cast<int32_t>(mutex_), 0)) {
       NanoSleep(100);
     }
   }
@@ -75,7 +75,7 @@
 BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    std::set<BaseMutex*>** all_mutexes_ptr = &all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>** all_mutexes_ptr = &gAllMutexData->all_mutexes;
     if (*all_mutexes_ptr == NULL) {
       // We leak the global set of all mutexes to avoid ordering issues in global variable
       // construction/destruction.
@@ -88,7 +88,7 @@
 BaseMutex::~BaseMutex() {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
-    all_mutex_data->all_mutexes->erase(this);
+    gAllMutexData->all_mutexes->erase(this);
   }
 }
 
@@ -96,13 +96,13 @@
   if (kLogLockContentions) {
     os << "Mutex logging:\n";
     ScopedAllMutexesLock mu(reinterpret_cast<const BaseMutex*>(-1));
-    std::set<BaseMutex*>* all_mutexes = all_mutex_data->all_mutexes;
+    std::set<BaseMutex*>* all_mutexes = gAllMutexData->all_mutexes;
     if (all_mutexes == NULL) {
       // No mutexes have been created yet during at startup.
       return;
     }
     typedef std::set<BaseMutex*>::const_iterator It;
-    os << "(Contented)\n";
+    os << "(Contended)\n";
     for (It it = all_mutexes->begin(); it != all_mutexes->end(); ++it) {
       BaseMutex* mutex = *it;
       if (mutex->HasEverContended()) {
@@ -127,7 +127,8 @@
     return;
   }
   if (kDebugLocking) {
-    CHECK(self->GetHeldMutex(level_) == this) << "Waiting on unacquired mutex: " << name_;
+    CHECK(self->GetHeldMutex(level_) == this || level_ == kMonitorLock)
+        << "Waiting on unacquired mutex: " << name_;
     bool bad_mutexes_held = false;
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index ee37388..feb8a6c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -58,7 +58,7 @@
 // futex.
 const bool kLogLockContentions = false;
 #endif
-const size_t kContentionLogSize = 64;
+const size_t kContentionLogSize = 4;
 const size_t kContentionLogDataSize = kLogLockContentions ? 1 : 0;
 const size_t kAllMutexDataSize = kLogLockContentions ? 1 : 0;
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 17f8625..8e42928 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -596,6 +596,7 @@
     // change process groups, so we don't get reaped by ProcessManager
     setpgid(0, 0);
 
+    // gLogVerbosity.class_linker = true;
     VLOG(class_linker) << dex2oat
                        << " --runtime-arg -Xms64m"
                        << " --runtime-arg -Xmx64m"
@@ -832,10 +833,14 @@
   RegisterOatFileLocked(*oat_file);
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location);
   if (oat_dex_file == NULL) {
-    LOG(ERROR) << "Failed to find dex file in generated oat file: " << oat_location;
+    LOG(ERROR) << "Failed to find dex file " << dex_location << " in generated oat file: " << oat_location;
     return NULL;
   }
-  return oat_dex_file->OpenDexFile();
+  const DexFile* result = oat_dex_file->OpenDexFile();
+  CHECK_EQ(dex_location_checksum, result->GetLocationChecksum()) << std::hex
+          << "dex_location_checksum=" << dex_location_checksum
+          << " DexFile::GetLocationChecksum()=" << result->GetLocationChecksum();
+  return result;
 }
 
 bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file,
@@ -870,14 +875,14 @@
     std::string image_file(image_header.GetImageRoot(
         ImageHeader::kOatLocation)->AsString()->ToModifiedUtf8());
     LOG(WARNING) << "oat file " << oat_file->GetLocation()
-                 << " mismatch ( " << std::hex << oat_file->GetOatHeader().GetImageFileLocationOatChecksum()
+                 << " mismatch (" << std::hex << oat_file->GetOatHeader().GetImageFileLocationOatChecksum()
                  << ", " << oat_file->GetOatHeader().GetImageFileLocationOatDataBegin()
                  << ") with " << image_file
                  << " (" << image_oat_checksum << ", " << std::hex << image_oat_data_begin << ")";
   }
   if (!dex_check) {
     LOG(WARNING) << "oat file " << oat_file->GetLocation()
-                 << " mismatch ( " << std::hex << oat_dex_file->GetDexFileLocationChecksum()
+                 << " mismatch (" << std::hex << oat_dex_file->GetDexFileLocationChecksum()
                  << ") with " << dex_location
                  << " (" << std::hex << dex_location_checksum << ")";
   }
@@ -889,6 +894,7 @@
                                                             uint32_t dex_location_checksum) {
   bool verified = VerifyOatFileChecksums(oat_file, dex_location, dex_location_checksum);
   if (!verified) {
+    delete oat_file;
     return NULL;
   }
   RegisterOatFileLocked(*oat_file);
@@ -906,8 +912,8 @@
   // Look for an existing file next to dex. for example, for
   // /foo/bar/baz.jar, look for /foo/bar/baz.odex.
   std::string odex_filename(OatFile::DexFilenameToOdexFilename(dex_location));
-  const OatFile* oat_file = FindOatFileFromOatLocationLocked(odex_filename);
-  if (oat_file != NULL) {
+  UniquePtr<const OatFile> oat_file(FindOatFileFromOatLocationLocked(odex_filename));
+  if (oat_file.get() != NULL) {
     uint32_t dex_location_checksum;
     if (!DexFile::GetChecksum(dex_location, dex_location_checksum)) {
       // If no classes.dex found in dex_location, it has been stripped, assume oat is up-to-date.
@@ -917,7 +923,7 @@
       RegisterOatFileLocked(*oat_file);
       return oat_dex_file->OpenDexFile();
     }
-    const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(oat_file,
+    const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(oat_file.release(),
                                                               dex_location,
                                                               dex_location_checksum);
     if (dex_file != NULL) {
@@ -927,21 +933,21 @@
   // Look for an existing file in the dalvik-cache, validating the result if found
   // not found in /foo/bar/baz.odex? try /data/dalvik-cache/foo@bar@baz.jar@classes.dex
   std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location));
-  oat_file = FindOatFileFromOatLocationLocked(cache_location);
-  if (oat_file != NULL) {
+  oat_file.reset(FindOatFileFromOatLocationLocked(cache_location));
+  if (oat_file.get() != NULL) {
     uint32_t dex_location_checksum;
     if (!DexFile::GetChecksum(dex_location, dex_location_checksum)) {
       LOG(WARNING) << "Failed to compute checksum: " << dex_location;
       return NULL;
     }
-    const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(oat_file,
+    const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(oat_file.release(),
                                                               dex_location,
                                                               dex_location_checksum);
     if (dex_file != NULL) {
       return dex_file;
     }
-    if (TEMP_FAILURE_RETRY(unlink(oat_file->GetLocation().c_str())) != 0) {
-      PLOG(FATAL) << "Failed to remove obsolete oat file " << oat_file->GetLocation();
+    if (TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) {
+      PLOG(FATAL) << "Failed to remove obsolete oat file from " << cache_location;
     }
   }
   LOG(INFO) << "Failed to open oat file from " << odex_filename << " or " << cache_location << ".";
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index ae57aa3..57bd57e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -674,15 +674,15 @@
   Locks::mutator_lock_->ExclusiveUnlock(self);
   Locks::mutator_lock_->SharedLock(self);
 
-  if (monitor_info.owner != NULL) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner->GetPeer()));
+  if (monitor_info.owner_ != NULL) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner_->GetPeer()));
   } else {
     expandBufAddObjectId(reply, gRegistry->Add(NULL));
   }
-  expandBufAdd4BE(reply, monitor_info.entry_count);
-  expandBufAdd4BE(reply, monitor_info.waiters.size());
-  for (size_t i = 0; i < monitor_info.waiters.size(); ++i) {
-    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters[i]->GetPeer()));
+  expandBufAdd4BE(reply, monitor_info.entry_count_);
+  expandBufAdd4BE(reply, monitor_info.waiters_.size());
+  for (size_t i = 0; i < monitor_info.waiters_.size(); ++i) {
+    expandBufAddObjectId(reply, gRegistry->Add(monitor_info.waiters_[i]->GetPeer()));
   }
   return JDWP::ERR_NONE;
 }
@@ -1935,7 +1935,8 @@
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), request_suspension, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer.get(), request_suspension, true,
+                                                   &timed_out);
   if (thread != NULL) {
     return JDWP::ERR_NONE;
   } else if (timed_out) {
@@ -2412,7 +2413,8 @@
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
         jobject thread_peer = gRegistry->GetJObject(thread_id);
         bool timed_out;
-        Thread* suspended_thread = Thread::SuspendForDebugger(thread_peer, true, &timed_out);
+        Thread* suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
+                                                                   &timed_out);
         CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kWaitingForDebuggerSuspension);
         if (suspended_thread == NULL) {
           // Thread terminated from under us while suspending.
@@ -3012,7 +3014,7 @@
 
   if (type == CHUNK_TYPE("THDE")) {
     uint8_t buf[4];
-    JDWP::Set4BE(&buf[0], t->GetThinLockId());
+    JDWP::Set4BE(&buf[0], t->GetThreadId());
     Dbg::DdmSendChunk(CHUNK_TYPE("THDE"), 4, buf);
   } else {
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
@@ -3022,7 +3024,7 @@
     const jchar* chars = (name.get() != NULL) ? name->GetCharArray()->GetData() : NULL;
 
     std::vector<uint8_t> bytes;
-    JDWP::Append4BE(bytes, t->GetThinLockId());
+    JDWP::Append4BE(bytes, t->GetThreadId());
     JDWP::AppendUtf16BE(bytes, chars, char_count);
     CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2);
     Dbg::DdmSendChunk(type, bytes);
@@ -3545,7 +3547,7 @@
   AllocRecord* record = &recent_allocation_records_[gAllocRecordHead];
   record->type = type;
   record->byte_count = byte_count;
-  record->thin_lock_id = self->GetThinLockId();
+  record->thin_lock_id = self->GetThreadId();
 
   // Fill in the stack trace.
   AllocRecordStackVisitor visitor(self, record);
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index e87dc96..e9e6c5a 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -378,7 +378,6 @@
   for (;;) {
     if (thread->ReadFlag(kCheckpointRequest)) {
       thread->RunCheckpointFunction();
-      thread->AtomicClearFlag(kCheckpointRequest);
     } else if (thread->ReadFlag(kSuspendRequest)) {
       thread->FullSuspendCheck();
     } else {
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 36ca604..2102ab1 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -15,28 +15,40 @@
  */
 
 #include "callee_save_frame.h"
+#include "common_throws.h"
 #include "mirror/object-inl.h"
 
 namespace art {
 
-extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self,
-                                       mirror::ArtMethod** sp)
-    UNLOCK_FUNCTION(monitor_lock_) {
+extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);  // Assumed to have been checked before entry
-  // MonitorExit may throw exception
-  return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-enter)");
+    return -1;  // Failure.
+  } else {
+    obj->MonitorEnter(self);  // May block
+    DCHECK(self->HoldsLock(obj));
+    DCHECK(!self->IsExceptionPending());
+    return 0;  // Success.
+    // Only possible exception is NPE and is handled before entry
+  }
 }
 
-extern "C" void artLockObjectFromCode(mirror::Object* obj, Thread* thread,
-                                      mirror::ArtMethod** sp)
-    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
-  FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
-  DCHECK(obj != NULL);        // Assumed to have been checked before entry
-  obj->MonitorEnter(thread);  // May block
-  DCHECK(thread->HoldsLock(obj));
-  // Only possible exception is NPE and is handled before entry
-  DCHECK(!thread->IsExceptionPending());
+extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+    UNLOCK_FUNCTION(monitor_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  if (UNLIKELY(obj == NULL)) {
+    ThrowLocation throw_location(self->GetCurrentLocationForThrow());
+    ThrowNullPointerException(&throw_location,
+                              "Null reference used for synchronization (monitor-exit)");
+    return -1;  // Failure.
+  } else {
+    // MonitorExit may throw exception.
+    return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
+  }
 }
 
 }  // namespace art
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 8be9b21..b1b664d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -434,7 +434,7 @@
         class_loader_(class_loader),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
-        jni_on_load_thread_id_(Thread::Current()->GetThinLockId()),
+        jni_on_load_thread_id_(Thread::Current()->GetThreadId()),
         jni_on_load_result_(kPending) {
   }
 
@@ -459,7 +459,7 @@
     {
       MutexLock mu(self, jni_on_load_lock_);
 
-      if (jni_on_load_thread_id_ == self->GetThinLockId()) {
+      if (jni_on_load_thread_id_ == self->GetThreadId()) {
         // Check this so we don't end up waiting for ourselves.  We need to return "true" so the
         // caller can continue.
         LOG(INFO) << *self << " recursive attempt to load library " << "\"" << path_ << "\"";
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
new file mode 100644
index 0000000..30bf9bb
--- /dev/null
+++ b/runtime/lock_word-inl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_INL_H_
+#define ART_RUNTIME_LOCK_WORD_INL_H_
+
+#include "lock_word.h"
+
+namespace art {
+
+inline uint32_t LockWord::ThinLockOwner() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockOwnerShift) & kThinLockOwnerMask;
+}
+
+inline uint32_t LockWord::ThinLockCount() const {
+  DCHECK_EQ(GetState(), kThinLocked);
+  return (value_ >> kThinLockCountShift) & kThinLockCountMask;
+}
+
+inline Monitor* LockWord::FatLockMonitor() const {
+  DCHECK_EQ(GetState(), kFatLocked);
+  return reinterpret_cast<Monitor*>(value_ << 1);
+}
+
+inline LockWord::LockWord() : value_(0) {
+  DCHECK_EQ(GetState(), kUnlocked);
+}
+
+inline LockWord::LockWord(Monitor* mon)
+    : value_((reinterpret_cast<uint32_t>(mon) >> 1) | (kStateFat << kStateShift)) {
+  DCHECK_EQ(FatLockMonitor(), mon);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_LOCK_WORD_INL_H_
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
new file mode 100644
index 0000000..cd4bfbb
--- /dev/null
+++ b/runtime/lock_word.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LOCK_WORD_H_
+#define ART_RUNTIME_LOCK_WORD_H_
+
+#include <iosfwd>
+#include <stdint.h>
+
+#include "base/logging.h"
+
+namespace art {
+namespace mirror {
+  class Object;
+}  // namespace mirror
+
+class Monitor;
+
+/* The lock value itself as stored in mirror::Object::monitor_.  The MSB of the lock encodes its
+ * state.  When cleared, the lock is in the "thin" state and its bits are formatted as follows:
+ *
+ *  |3|32222222222111|11111110000000000|
+ *  |1|09876543210987|65432109876543210|
+ *  |0| lock count   | thread id       |
+ *
+ * When set, the lock is in the "fat" state and its bits are formatted as follows:
+ *
+ *  |3|3222222222211111111110000000000|
+ *  |1|0987654321098765432109876543210|
+ *  |1| Monitor* >> 1                 |
+ */
+class LockWord {
+ public:
+  enum {
+    // Number of bits to encode the state, currently just fat or thin/unlocked.
+    kStateSize = 1,
+    // Number of bits to encode the thin lock owner.
+    kThinLockOwnerSize = 16,
+    // Remaining bits are the recursive lock count.
+    kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize,
+
+    // Thin lock bits. Owner in lowest bits.
+    kThinLockOwnerShift = 0,
+    kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1,
+    // Count in higher bits.
+    kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift,
+    kThinLockCountMask = (1 << kThinLockCountShift) - 1,
+    kThinLockMaxCount = kThinLockCountMask,
+
+    // State in the highest bits.
+    kStateShift = kThinLockCountSize + kThinLockCountShift,
+    kStateMask = (1 << kStateSize) - 1,
+    kStateThinOrUnlocked = 0,
+    kStateFat = 1,
+  };
+
+  static LockWord FromThinLockId(uint32_t thread_id, uint32_t count) {
+    CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockOwnerMask));
+    return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift));
+  }
+
+  enum LockState {
+    kUnlocked,    // No lock owners.
+    kThinLocked,  // Single uncontended owner.
+    kFatLocked    // See associated monitor.
+  };
+
+  LockState GetState() const {
+    if (value_ == 0) {
+      return kUnlocked;
+    } else if (((value_ >> kStateShift) & kStateMask) == kStateThinOrUnlocked) {
+      return kThinLocked;
+    } else {
+      return kFatLocked;
+    }
+  }
+
+  // Return the owner thin lock thread id.
+  uint32_t ThinLockOwner() const;
+
+  // Return the number of times a lock value has been locked.
+  uint32_t ThinLockCount() const;
+
+  // Return the Monitor encoded in a fat lock.
+  Monitor* FatLockMonitor() const;
+
+  // Default constructor with no lock ownership.
+  LockWord();
+
+  // Constructor a lock word for inflation to use a Monitor.
+  explicit LockWord(Monitor* mon);
+
+  bool operator==(const LockWord& rhs) {
+    return GetValue() == rhs.GetValue();
+  }
+
+ private:
+  explicit LockWord(uint32_t val) : value_(val) {}
+
+  uint32_t GetValue() const {
+    return value_;
+  }
+
+  // Only Object should be converting LockWords to/from uints.
+  friend class mirror::Object;
+
+  // The encoded value holding all the state.
+  uint32_t value_;
+};
+std::ostream& operator<<(std::ostream& os, const LockWord::LockState& code);
+
+}  // namespace art
+
+
+#endif  // ART_RUNTIME_LOCK_WORD_H_
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index c6db5b9..b16c2f7 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -60,7 +60,7 @@
     }
     if (new_status >= kStatusResolved || old_status >= kStatusResolved) {
       // When classes are being resolved the resolution code should hold the lock.
-      CHECK_EQ(GetThinLockId(), self->GetThinLockId())
+      CHECK_EQ(GetLockOwnerThreadId(), self->GetThreadId())
             << "Attempt to change status of class while not holding its lock: "
             << PrettyClass(this) << " " << old_status << " -> " << new_status;
     }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5ed3db3..e659108 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -24,6 +24,7 @@
 #include "atomic.h"
 #include "array-inl.h"
 #include "class.h"
+#include "lock_word-inl.h"
 #include "monitor.h"
 #include "runtime.h"
 #include "throwable.h"
@@ -43,8 +44,21 @@
   SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
-inline uint32_t Object::GetThinLockId() {
-  return Monitor::GetThinLockId(monitor_);
+inline LockWord Object::GetLockWord() {
+  return LockWord(GetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), true));
+}
+
+inline void Object::SetLockWord(LockWord new_val) {
+  SetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), new_val.GetValue(), true);
+}
+
+inline bool Object::CasLockWord(LockWord old_val, LockWord new_val) {
+  return CasField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), old_val.GetValue(),
+                    new_val.GetValue());
+}
+
+inline uint32_t Object::GetLockOwnerThreadId() {
+  return Monitor::GetLockOwnerThreadId(this);
 }
 
 inline void Object::MonitorEnter(Thread* self) {
@@ -238,6 +252,13 @@
   return result;
 }
 
+inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  int32_t* addr = reinterpret_cast<int32_t*>(raw_addr);
+  return android_atomic_release_cas(old_value, new_value, addr) == 0;
+}
+
 inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 003581a..e3f5c10 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -26,6 +26,7 @@
 namespace art {
 
 class ImageWriter;
+class LockWord;
 struct ObjectOffsets;
 class Thread;
 
@@ -95,14 +96,10 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  volatile int32_t* GetRawLockWordAddress() {
-    byte* raw_addr = reinterpret_cast<byte*>(this) +
-        OFFSET_OF_OBJECT_MEMBER(Object, monitor_).Int32Value();
-    int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
-    return const_cast<volatile int32_t*>(word_addr);
-  }
-
-  uint32_t GetThinLockId();
+  LockWord GetLockWord();
+  void SetLockWord(LockWord new_val);
+  bool CasLockWord(LockWord old_val, LockWord new_val);
+  uint32_t GetLockOwnerThreadId();
 
   void MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
@@ -226,6 +223,8 @@
     }
   }
 
+  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value);
+
   uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const;
 
   void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index e7ab2d4..1ceaa5d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -23,6 +23,7 @@
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
+#include "lock_word-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -37,36 +38,20 @@
 namespace art {
 
 /*
- * Every Object has a monitor associated with it, but not every Object is
- * actually locked.  Even the ones that are locked do not need a
- * full-fledged monitor until a) there is actual contention or b) wait()
- * is called on the Object.
+ * Every Object has a monitor associated with it, but not every Object is actually locked.  Even
+ * the ones that are locked do not need a full-fledged monitor until a) there is actual contention
+ * or b) wait() is called on the Object.
  *
- * For Android, we have implemented a scheme similar to the one described
- * in Bacon et al.'s "Thin locks: featherweight synchronization for Java"
- * (ACM 1998).  Things are even easier for us, though, because we have
- * a full 32 bits to work with.
+ * For Android, we have implemented a scheme similar to the one described in Bacon et al.'s
+ * "Thin locks: featherweight synchronization for Java" (ACM 1998).  Things are even easier for us,
+ * though, because we have a full 32 bits to work with.
  *
- * The two states of an Object's lock are referred to as "thin" and
- * "fat".  A lock may transition from the "thin" state to the "fat"
- * state and this transition is referred to as inflation.  Once a lock
- * has been inflated it remains in the "fat" state indefinitely.
+ * The two states of an Object's lock are referred to as "thin" and "fat".  A lock may transition
+ * from the "thin" state to the "fat" state and this transition is referred to as inflation. Once
+ * a lock has been inflated it remains in the "fat" state indefinitely.
  *
- * The lock value itself is stored in Object.lock.  The LSB of the
- * lock encodes its state.  When cleared, the lock is in the "thin"
- * state and its bits are formatted as follows:
- *
- *    [31 ---- 19] [18 ---- 3] [2 ---- 1] [0]
- *     lock count   thread id  hash state  0
- *
- * When set, the lock is in the "fat" state and its bits are formatted
- * as follows:
- *
- *    [31 ---- 3] [2 ---- 1] [0]
- *      pointer   hash state  1
- *
- * For an in-depth description of the mechanics of thin-vs-fat locking,
- * read the paper referred to above.
+ * The lock value itself is stored in mirror::Object::monitor_ and the representation is described
+ * in the LockWord value type.
  *
  * Monitors provide:
  *  - mutually exclusive access to resources
@@ -74,32 +59,11 @@
  *
  * In effect, they fill the role of both mutexes and condition variables.
  *
- * Only one thread can own the monitor at any time.  There may be several
- * threads waiting on it (the wait call unlocks it).  One or more waiting
- * threads may be getting interrupted or notified at any given time.
- *
- * TODO: the various members of monitor are not SMP-safe.
+ * Only one thread can own the monitor at any time.  There may be several threads waiting on it
+ * (the wait call unlocks it).  One or more waiting threads may be getting interrupted or notified
+ * at any given time.
  */
 
-// The shape is the bottom bit; either LW_SHAPE_THIN or LW_SHAPE_FAT.
-#define LW_SHAPE_MASK 0x1
-#define LW_SHAPE(x) static_cast<int>((x) & LW_SHAPE_MASK)
-
-/*
- * Monitor accessor.  Extracts a monitor structure pointer from a fat
- * lock.  Performs no error checking.
- */
-#define LW_MONITOR(x) \
-  (reinterpret_cast<Monitor*>((x) & ~((LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT) | LW_SHAPE_MASK)))
-
-/*
- * Lock recursion count field.  Contains a count of the number of times
- * a lock has been recursively acquired.
- */
-#define LW_LOCK_COUNT_MASK 0x1fff
-#define LW_LOCK_COUNT_SHIFT 19
-#define LW_LOCK_COUNT(x) (((x) >> LW_LOCK_COUNT_SHIFT) & LW_LOCK_COUNT_MASK)
-
 bool (*Monitor::is_sensitive_thread_hook_)() = NULL;
 uint32_t Monitor::lock_profiling_threshold_ = 0;
 
@@ -117,29 +81,43 @@
 
 Monitor::Monitor(Thread* owner, mirror::Object* obj)
     : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
       owner_(owner),
       lock_count_(0),
       obj_(obj),
       wait_set_(NULL),
       locking_method_(NULL),
       locking_dex_pc_(0) {
-  monitor_lock_.Lock(owner);
+  // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
+  // with the owner unlocking the thin-lock.
+  CHECK(owner == Thread::Current() || owner->IsSuspended());
+}
+
+bool Monitor::Install(Thread* self) {
+  MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
+  CHECK(owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  uint32_t thin = *obj->GetRawLockWordAddress();
-  lock_count_ = LW_LOCK_COUNT(thin);
-  thin &= LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT;
-  thin |= reinterpret_cast<uint32_t>(this) | LW_SHAPE_FAT;
-  // Publish the updated lock word.
-  android_atomic_release_store(thin, obj->GetRawLockWordAddress());
-  // Lock profiling.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = owner->GetCurrentMethod(&locking_dex_pc_);
+  LockWord thin(obj_->GetLockWord());
+  if (thin.GetState() != LockWord::kThinLocked) {
+    // The owner_ is suspended but another thread beat us to install a monitor.
+    CHECK_EQ(thin.GetState(), LockWord::kFatLocked);
+    return false;
   }
+  CHECK_EQ(owner_->GetThreadId(), thin.ThinLockOwner());
+  lock_count_ = thin.ThinLockCount();
+  LockWord fat(this);
+  // Publish the updated lock word, which may race with other threads.
+  bool success = obj_->CasLockWord(thin, fat);
+  // Lock profiling.
+  if (success && lock_profiling_threshold_ != 0) {
+    locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
+  }
+  return success;
 }
 
 Monitor::~Monitor() {
-  DCHECK(obj_ != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj_->GetRawLockWordAddress()), LW_SHAPE_FAT);
+  CHECK(obj_ != NULL);
+  CHECK_EQ(obj_->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 /*
@@ -190,64 +168,56 @@
   }
 }
 
-mirror::Object* Monitor::GetObject() {
-  return obj_;
-}
-
 void Monitor::SetObject(mirror::Object* object) {
   obj_ = object;
 }
 
 void Monitor::Lock(Thread* self) {
-  if (owner_ == self) {
-    lock_count_++;
-    return;
-  }
-
-  if (!monitor_lock_.TryLock(self)) {
-    uint64_t waitStart = 0;
-    uint64_t waitEnd = 0;
-    uint32_t wait_threshold = lock_profiling_threshold_;
-    const mirror::ArtMethod* current_locking_method = NULL;
-    uint32_t current_locking_dex_pc = 0;
+  MutexLock mu(self, monitor_lock_);
+  while (true) {
+    if (owner_ == NULL) {  // Unowned.
+      owner_ = self;
+      CHECK_EQ(lock_count_, 0);
+      // When debugging, save the current monitor holder for future
+      // acquisition failures to use in sampled logging.
+      if (lock_profiling_threshold_ != 0) {
+        locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+      }
+      return;
+    } else if (owner_ == self) {  // Recursive.
+      lock_count_++;
+      return;
+    }
+    // Contended.
+    const bool log_contention = (lock_profiling_threshold_ != 0);
+    uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
+    const mirror::ArtMethod* owners_method = locking_method_;
+    uint32_t owners_dex_pc = locking_dex_pc_;
+    monitor_lock_.Unlock(self);  // Let go of locks in order.
     {
-      ScopedThreadStateChange tsc(self, kBlocked);
-      if (wait_threshold != 0) {
-        waitStart = NanoTime() / 1000;
-      }
-      current_locking_method = locking_method_;
-      current_locking_dex_pc = locking_dex_pc_;
-
-      monitor_lock_.Lock(self);
-      if (wait_threshold != 0) {
-        waitEnd = NanoTime() / 1000;
+      ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
+      MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+      if (owner_ != NULL) {  // Did the owner_ give the lock up?
+        monitor_contenders_.Wait(self);  // Still contended so wait.
+        // Woken from contention.
+        if (log_contention) {
+          uint64_t wait_ms = MilliTime() - wait_start_ms;
+          uint32_t sample_percent;
+          if (wait_ms >= lock_profiling_threshold_) {
+            sample_percent = 100;
+          } else {
+            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+          }
+          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+            const char* owners_filename;
+            uint32_t owners_line_number;
+            TranslateLocation(owners_method, owners_dex_pc, &owners_filename, &owners_line_number);
+            LogContentionEvent(self, wait_ms, sample_percent, owners_filename, owners_line_number);
+          }
+        }
       }
     }
-
-    if (wait_threshold != 0) {
-      uint64_t wait_ms = (waitEnd - waitStart) / 1000;
-      uint32_t sample_percent;
-      if (wait_ms >= wait_threshold) {
-        sample_percent = 100;
-      } else {
-        sample_percent = 100 * wait_ms / wait_threshold;
-      }
-      if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-        const char* current_locking_filename;
-        uint32_t current_locking_line_number;
-        TranslateLocation(current_locking_method, current_locking_dex_pc,
-                          current_locking_filename, current_locking_line_number);
-        LogContentionEvent(self, wait_ms, sample_percent, current_locking_filename, current_locking_line_number);
-      }
-    }
-  }
-  owner_ = self;
-  DCHECK_EQ(lock_count_, 0);
-
-  // When debugging, save the current monitor holder for future
-  // acquisition failures to use in sampled logging.
-  if (lock_profiling_threshold_ != 0) {
-    locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+    monitor_lock_.Lock(self);  // Reacquire locks in order.
   }
 }
 
@@ -261,10 +231,11 @@
   Thread* self = Thread::Current();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   self->ThrowNewExceptionV(throw_location, "Ljava/lang/IllegalMonitorStateException;", fmt, args);
-  if (!Runtime::Current()->IsStarted()) {
+  if (!Runtime::Current()->IsStarted() || VLOG_IS_ON(monitor)) {
     std::ostringstream ss;
     self->Dump(ss);
-    LOG(ERROR) << self->GetException(NULL)->Dump() << "\n" << ss.str();
+    LOG(Runtime::Current()->IsStarted() ? INFO : ERROR)
+        << self->GetException(NULL)->Dump() << "\n" << ss.str();
   }
   va_end(args);
 }
@@ -290,7 +261,7 @@
     // Acquire thread list lock so threads won't disappear from under us.
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
     // Re-read owner now that we hold lock.
-    current_owner = (monitor != NULL) ? monitor->owner_ : NULL;
+    current_owner = (monitor != NULL) ? monitor->GetOwner() : NULL;
     // Get short descriptions of the threads involved.
     current_owner_string = ThreadToString(current_owner);
     expected_owner_string = ThreadToString(expected_owner);
@@ -338,8 +309,9 @@
   }
 }
 
-bool Monitor::Unlock(Thread* self, bool for_wait) {
+bool Monitor::Unlock(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   Thread* owner = owner_;
   if (owner == self) {
     // We own the monitor, so nobody else can be in here.
@@ -347,17 +319,11 @@
       owner_ = NULL;
       locking_method_ = NULL;
       locking_dex_pc_ = 0;
-      monitor_lock_.Unlock(self);
+      // Wake a contender.
+      monitor_contenders_.Signal(self);
     } else {
       --lock_count_;
     }
-  } else if (for_wait) {
-    // Wait should have already cleared the fields.
-    DCHECK_EQ(lock_count_, 0);
-    DCHECK(owner == NULL);
-    DCHECK(locking_method_ == NULL);
-    DCHECK_EQ(locking_dex_pc_, 0u);
-    monitor_lock_.Unlock(self);
   } else {
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
@@ -396,12 +362,14 @@
   DCHECK(self != NULL);
   DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
 
+  monitor_lock_.Lock(self);
+
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+    monitor_lock_.Unlock(self);
     return;
   }
-  monitor_lock_.AssertHeld(self);
 
   // We need to turn a zero-length timed wait into a regular wait because
   // Object.wait(0, 0) is defined as Object.wait(0), which is defined as Object.wait().
@@ -409,16 +377,12 @@
     why = kWaiting;
   }
 
-  WaitWithLock(self, ms, ns, interruptShouldThrow, why);
-}
-
-void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns,
-                           bool interruptShouldThrow, ThreadState why) {
   // Enforce the timeout range.
   if (ms < 0 || ns < 0 || ns > 999999) {
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/IllegalArgumentException;",
                              "timeout arguments out of range: ms=%lld ns=%d", ms, ns);
+    monitor_lock_.Unlock(self);
     return;
   }
 
@@ -460,7 +424,8 @@
     self->wait_monitor_ = this;
 
     // Release the monitor lock.
-    Unlock(self, true);
+    monitor_contenders_.Signal(self);
+    monitor_lock_.Unlock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
     if (self->interrupted_) {
@@ -493,9 +458,9 @@
     self->wait_monitor_ = NULL;
   }
 
-  // Re-acquire the monitor lock.
+  // Re-acquire the monitor and lock.
   Lock(self);
-
+  monitor_lock_.Lock(self);
   self->wait_mutex_->AssertNotHeld(self);
 
   /*
@@ -527,20 +492,17 @@
       self->ThrowNewException(throw_location, "Ljava/lang/InterruptedException;", NULL);
     }
   }
+  monitor_lock_.Unlock(self);
 }
 
 void Monitor::Notify(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyWithLock(self);
-}
-
-void Monitor::NotifyWithLock(Thread* self) {
   // Signal the first waiting thread in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -558,16 +520,12 @@
 
 void Monitor::NotifyAll(Thread* self) {
   DCHECK(self != NULL);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
-  monitor_lock_.AssertHeld(self);
-  NotifyAllWithLock();
-}
-
-void Monitor::NotifyAllWithLock() {
   // Signal all threads in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -578,182 +536,130 @@
 }
 
 /*
- * Changes the shape of a monitor from thin to fat, preserving the
- * internal lock state. The calling thread must own the lock.
+ * Changes the shape of a monitor from thin to fat, preserving the internal lock state. The calling
+ * thread must own the lock or the owner must be suspended. There's a race with other threads
+ * inflating the lock and so the caller should read the monitor following the call.
  */
-void Monitor::Inflate(Thread* self, mirror::Object* obj) {
+void Monitor::Inflate(Thread* self, Thread* owner, mirror::Object* obj) {
   DCHECK(self != NULL);
+  DCHECK(owner != NULL);
   DCHECK(obj != NULL);
-  DCHECK_EQ(LW_SHAPE(*obj->GetRawLockWordAddress()), LW_SHAPE_THIN);
-  DCHECK_EQ(LW_LOCK_OWNER(*obj->GetRawLockWordAddress()), static_cast<int32_t>(self->GetThinLockId()));
 
   // Allocate and acquire a new monitor.
-  Monitor* m = new Monitor(self, obj);
-  VLOG(monitor) << "monitor: thread " << self->GetThinLockId()
-                << " created monitor " << m << " for object " << obj;
-  Runtime::Current()->GetMonitorList()->Add(m);
+  UniquePtr<Monitor> m(new Monitor(owner, obj));
+  if (m->Install(self)) {
+    VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
+                    << " created monitor " << m.get() << " for object " << obj;
+    Runtime::Current()->GetMonitorList()->Add(m.release());
+  }
+  CHECK_EQ(obj->GetLockWord().GetState(), LockWord::kFatLocked);
 }
 
 void Monitor::MonitorEnter(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-  uint32_t sleepDelayNs;
-  uint32_t minSleepDelayNs = 1000000;  /* 1 millisecond */
-  uint32_t maxSleepDelayNs = 1000000000;  /* 1 second */
-  uint32_t thin, newThin;
-
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
-  uint32_t threadId = self->GetThinLockId();
- retry:
-  thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is a thin lock.  The owner field is used to
-     * determine the acquire method, ordered by cost.
-     */
-    if (LW_LOCK_OWNER(thin) == threadId) {
-      /*
-       * The calling thread owns the lock.  Increment the
-       * value of the recursion count field.
-       */
-      *thinp += 1 << LW_LOCK_COUNT_SHIFT;
-      if (LW_LOCK_COUNT(*thinp) == LW_LOCK_COUNT_MASK) {
-        /*
-         * The reacquisition limit has been reached.  Inflate
-         * the lock so the next acquire will not overflow the
-         * recursion count field.
-         */
-        Inflate(self, obj);
+  uint32_t thread_id = self->GetThreadId();
+  size_t contention_count = 0;
+
+  while (true) {
+    LockWord lock_word = obj->GetLockWord();
+    switch (lock_word.GetState()) {
+      case LockWord::kUnlocked: {
+        LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
+        if (obj->CasLockWord(lock_word, thin_locked)) {
+          return;  // Success!
+        }
+        continue;  // Go again.
       }
-    } else if (LW_LOCK_OWNER(thin) == 0) {
-      // The lock is unowned. Install the thread id of the calling thread into the owner field.
-      // This is the common case: compiled code will have tried this before calling back into
-      // the runtime.
-      newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-      if (android_atomic_acquire_cas(thin, newThin, thinp) != 0) {
-        // The acquire failed. Try again.
-        goto retry;
-      }
-    } else {
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p (a %s) owned by %d",
-                                    threadId, thinp, PrettyTypeOf(obj).c_str(), LW_LOCK_OWNER(thin));
-      // The lock is owned by another thread. Notify the runtime that we are about to wait.
-      self->monitor_enter_object_ = obj;
-      self->TransitionFromRunnableToSuspended(kBlocked);
-      // Spin until the thin lock is released or inflated.
-      sleepDelayNs = 0;
-      for (;;) {
-        thin = *thinp;
-        // Check the shape of the lock word. Another thread
-        // may have inflated the lock while we were waiting.
-        if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-          if (LW_LOCK_OWNER(thin) == 0) {
-            // The lock has been released. Install the thread id of the
-            // calling thread into the owner field.
-            newThin = thin | (threadId << LW_LOCK_OWNER_SHIFT);
-            if (android_atomic_acquire_cas(thin, newThin, thinp) == 0) {
-              // The acquire succeed. Break out of the loop and proceed to inflate the lock.
-              break;
-            }
+      case LockWord::kThinLocked: {
+        uint32_t owner_thread_id = lock_word.ThinLockOwner();
+        if (owner_thread_id == thread_id) {
+          // We own the lock, increase the recursion count.
+          uint32_t new_count = lock_word.ThinLockCount() + 1;
+          if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
+            LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+            obj->SetLockWord(thin_locked);
+            return;  // Success!
           } else {
-            // The lock has not been released. Yield so the owning thread can run.
-            if (sleepDelayNs == 0) {
-              sched_yield();
-              sleepDelayNs = minSleepDelayNs;
-            } else {
-              NanoSleep(sleepDelayNs);
-              // Prepare the next delay value. Wrap to avoid once a second polls for eternity.
-              if (sleepDelayNs < maxSleepDelayNs / 2) {
-                sleepDelayNs *= 2;
-              } else {
-                sleepDelayNs = minSleepDelayNs;
+            // We'd overflow the recursion count, so inflate the monitor.
+            Inflate(self, self, obj);
+          }
+        } else {
+          // Contention.
+          contention_count++;
+          if (contention_count <= Runtime::Current()->GetMaxSpinsBeforeThinkLockInflation()) {
+            NanoSleep(1000);  // Sleep for 1us and re-attempt.
+          } else {
+            contention_count = 0;
+            // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
+            ScopedThreadStateChange tsc(self, kBlocked);
+            bool timed_out;
+            ThreadList* thread_list = Runtime::Current()->GetThreadList();
+            if (lock_word == obj->GetLockWord()) {  // If lock word hasn't changed.
+              Thread* owner = thread_list->SuspendThreadByThreadId(lock_word.ThinLockOwner(), false,
+                                                                   &timed_out);
+              if (owner != NULL) {
+                // We succeeded in suspending the thread, check the lock's status didn't change.
+                lock_word = obj->GetLockWord();
+                if (lock_word.GetState() == LockWord::kThinLocked &&
+                    lock_word.ThinLockOwner() == owner_thread_id) {
+                  // Go ahead and inflate the lock.
+                  Inflate(self, owner, obj);
+                }
+                thread_list->Resume(owner, false);
               }
             }
           }
-        } else {
-          // The thin lock was inflated by another thread. Let the runtime know we are no longer
-          // waiting and try again.
-          VLOG(monitor) << StringPrintf("monitor: thread %d found lock %p surprise-fattened by another thread", threadId, thinp);
-          self->monitor_enter_object_ = NULL;
-          self->TransitionFromSuspendedToRunnable();
-          goto retry;
         }
+        continue;  // Start from the beginning.
       }
-      VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p done", threadId, thinp);
-      // We have acquired the thin lock. Let the runtime know that we are no longer waiting.
-      self->monitor_enter_object_ = NULL;
-      self->TransitionFromSuspendedToRunnable();
-      // Fatten the lock.
-      Inflate(self, obj);
-      VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p", threadId, thinp);
+      case LockWord::kFatLocked: {
+        Monitor* mon = lock_word.FatLockMonitor();
+        mon->Lock(self);
+        return;  // Success!
+      }
     }
-  } else {
-    // The lock is a fat lock.
-    VLOG(monitor) << StringPrintf("monitor: thread %d locking fat lock %p (%p) %p on a %s",
-                                  threadId, thinp, LW_MONITOR(*thinp),
-                                  reinterpret_cast<void*>(*thinp), PrettyTypeOf(obj).c_str());
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    LW_MONITOR(*thinp)->Lock(self);
   }
 }
 
 bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
-
   DCHECK(self != NULL);
-  // DCHECK_EQ(self->GetState(), kRunnable);
   DCHECK(obj != NULL);
 
-  /*
-   * Cache the lock word as its value can change while we are
-   * examining its state.
-   */
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    /*
-     * The lock is thin.  We must ensure that the lock is owned
-     * by the given thread before unlocking it.
-     */
-    if (LW_LOCK_OWNER(thin) == self->GetThinLockId()) {
-      /*
-       * We are the lock owner.  It is safe to update the lock
-       * without CAS as lock ownership guards the lock itself.
-       */
-      if (LW_LOCK_COUNT(thin) == 0) {
-        /*
-         * The lock was not recursively acquired, the common
-         * case.  Unlock by clearing all bits except for the
-         * hash state.
-         */
-        thin &= (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT);
-        android_atomic_release_store(thin, thinp);
-      } else {
-        /*
-         * The object was recursively acquired.  Decrement the
-         * lock recursion count field.
-         */
-        *thinp -= 1 << LW_LOCK_COUNT_SHIFT;
-      }
-    } else {
-      /*
-       * We do not own the lock.  The JVM spec requires that we
-       * throw an exception in this case.
-       */
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       FailedUnlock(obj, self, NULL, NULL);
-      return false;
+      return false;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        // TODO: there's a race here with the owner dying while we unlock.
+        Thread* owner =
+            Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+        FailedUnlock(obj, self, owner, NULL);
+        return false;  // Failure.
+      } else {
+        // We own the lock, decrease the recursion count.
+        if (lock_word.ThinLockCount() != 0) {
+          uint32_t new_count = lock_word.ThinLockCount() - 1;
+          LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
+          obj->SetLockWord(thin_locked);
+        } else {
+          obj->SetLockWord(LockWord());
+        }
+        return true;  // Success!
+      }
     }
-  } else {
-    /*
-     * The lock is fat.  We must check to see if Unlock has
-     * raised any exceptions before continuing.
-     */
-    DCHECK(LW_MONITOR(*thinp) != NULL);
-    if (!LW_MONITOR(*thinp)->Unlock(self, false)) {
-      // An exception has been raised.  Do not fall through.
-      return false;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->Unlock(self);
     }
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
-  return true;
 }
 
 /*
@@ -761,84 +667,91 @@
  */
 void Monitor::Wait(Thread* self, mirror::Object *obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why) {
-  volatile int32_t* thinp = obj->GetRawLockWordAddress();
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, we need to fatten it.
-  uint32_t thin = *thinp;
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
+        return;  // Failure.
+      } else {
+        // We own the lock, inflate to enqueue ourself on the Monitor.
+        Inflate(self, self, obj);
+        lock_word = obj->GetLockWord();
+      }
+      break;
     }
-
-    /* This thread holds the lock.  We need to fatten the lock
-     * so 'self' can block on it.  Don't update the object lock
-     * field yet, because 'self' needs to acquire the lock before
-     * any other thread gets a chance.
-     */
-    Inflate(self, obj);
-    VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p by wait()", self->GetThinLockId(), thinp);
+    case LockWord::kFatLocked:
+      break;  // Already set for a wait.
   }
-  LW_MONITOR(*thinp)->Wait(self, ms, ns, interruptShouldThrow, why);
+  Monitor* mon = lock_word.FatLockMonitor();
+  mon->Wait(self, ms, ns, interruptShouldThrow, why);
 }
 
-void Monitor::Notify(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+void Monitor::InflateAndNotify(Thread* self, mirror::Object* obj, bool notify_all) {
+  DCHECK(self != NULL);
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
       ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
-      return;
+      return;  // Failure.
+    case LockWord::kThinLocked: {
+      uint32_t thread_id = self->GetThreadId();
+      uint32_t owner_thread_id = lock_word.ThinLockOwner();
+      if (owner_thread_id != thread_id) {
+        ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
+        return;  // Failure.
+      } else {
+        // We own the lock but there's no Monitor and therefore no waiters.
+        return;  // Success.
+      }
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the Notify is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->Notify(self);
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      if (notify_all) {
+        mon->NotifyAll(self);
+      } else {
+        mon->Notify(self);
+      }
+      return;  // Success.
+    }
   }
 }
 
-void Monitor::NotifyAll(Thread* self, mirror::Object *obj) {
-  uint32_t thin = *obj->GetRawLockWordAddress();
+uint32_t Monitor::GetLockOwnerThreadId(mirror::Object* obj) {
+  DCHECK(obj != NULL);
 
-  // If the lock is still thin, there aren't any waiters;
-  // waiting on an object forces lock fattening.
-  if (LW_SHAPE(thin) == LW_SHAPE_THIN) {
-    // Make sure that 'self' holds the lock.
-    if (LW_LOCK_OWNER(thin) != self->GetThinLockId()) {
-      ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
-      return;
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      return ThreadList::kInvalidThreadId;
+    case LockWord::kThinLocked:
+      return lock_word.ThinLockOwner();
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      return mon->GetOwnerThreadId();
     }
-    // no-op;  there are no waiters to notify.
-    // We inflate here in case the NotifyAll is in a tight loop. Without inflation here the waiter
-    // will struggle to get in. Bug 6961405.
-    Inflate(self, obj);
-  } else {
-    // It's a fat lock.
-    LW_MONITOR(thin)->NotifyAll(self);
-  }
-}
-
-uint32_t Monitor::GetThinLockId(uint32_t raw_lock_word) {
-  if (LW_SHAPE(raw_lock_word) == LW_SHAPE_THIN) {
-    return LW_LOCK_OWNER(raw_lock_word);
-  } else {
-    Thread* owner = LW_MONITOR(raw_lock_word)->owner_;
-    return owner ? owner->GetThinLockId() : 0;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return ThreadList::kInvalidThreadId;
   }
 }
 
 void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
   ThreadState state = thread->GetState();
 
-  mirror::Object* object = NULL;
-  uint32_t lock_owner = ThreadList::kInvalidId;
+  int32_t object_identity_hashcode = 0;
+  uint32_t lock_owner = ThreadList::kInvalidThreadId;
+  std::string pretty_type;
   if (state == kWaiting || state == kTimedWaiting || state == kSleeping) {
     if (state == kSleeping) {
       os << "  - sleeping on ";
@@ -850,14 +763,18 @@
       MutexLock mu(self, *thread->wait_mutex_);
       Monitor* monitor = thread->wait_monitor_;
       if (monitor != NULL) {
-        object = monitor->obj_;
+        mirror::Object* object = monitor->obj_;
+        object_identity_hashcode = object->IdentityHashCode();
+        pretty_type = PrettyTypeOf(object);
       }
     }
   } else if (state == kBlocked) {
     os << "  - waiting to lock ";
-    object = thread->monitor_enter_object_;
+    mirror::Object* object = thread->monitor_enter_object_;
     if (object != NULL) {
-      lock_owner = object->GetThinLockId();
+      object_identity_hashcode = object->IdentityHashCode();
+      lock_owner = object->GetLockOwnerThreadId();
+      pretty_type = PrettyTypeOf(object);
     }
   } else {
     // We're not waiting on anything.
@@ -865,10 +782,10 @@
   }
 
   // - waiting on <0x6008c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
-  os << "<" << object << "> (a " << PrettyTypeOf(object) << ")";
+  os << StringPrintf("<0x%08x> (a %s)", object_identity_hashcode, pretty_type.c_str());
 
   // - waiting to lock <0x613f83d8> (a java.lang.Object) held by thread 5
-  if (lock_owner != ThreadList::kInvalidId) {
+  if (lock_owner != ThreadList::kInvalidThreadId) {
     os << " held by thread " << lock_owner;
   }
 
@@ -879,18 +796,15 @@
   // This is used to implement JDWP's ThreadReference.CurrentContendedMonitor, and has a bizarre
   // definition of contended that includes a monitor a thread is trying to enter...
   mirror::Object* result = thread->monitor_enter_object_;
-  if (result != NULL) {
-    return result;
-  }
-  // ...but also a monitor that the thread is waiting on.
-  {
+  if (result == NULL) {
+    // ...but also a monitor that the thread is waiting on.
     MutexLock mu(Thread::Current(), *thread->wait_mutex_);
     Monitor* monitor = thread->wait_monitor_;
     if (monitor != NULL) {
-      return monitor->obj_;
+      result = monitor->GetObject();
     }
   }
-  return NULL;
+  return result;
 }
 
 void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::Object*, void*),
@@ -955,41 +869,56 @@
   }
 }
 
-bool Monitor::IsValidLockWord(int32_t lock_word) {
-  if (lock_word == 0) {
-    return true;
-  } else if (LW_SHAPE(lock_word) == LW_SHAPE_FAT) {
-    Monitor* mon = LW_MONITOR(lock_word);
-    MonitorList* list = Runtime::Current()->GetMonitorList();
-    MutexLock mu(Thread::Current(), list->monitor_list_lock_);
-    bool found = false;
-    for (Monitor* list_mon : list->list_) {
-      if (mon == list_mon) {
-        found = true;
-        break;
+bool Monitor::IsValidLockWord(LockWord lock_word) {
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      // Nothing to check.
+      return true;
+    case LockWord::kThinLocked:
+      // Basic sanity check of owner.
+      return lock_word.ThinLockOwner() != ThreadList::kInvalidThreadId;
+    case LockWord::kFatLocked: {
+      // Check the  monitor appears in the monitor list.
+      Monitor* mon = lock_word.FatLockMonitor();
+      MonitorList* list = Runtime::Current()->GetMonitorList();
+      MutexLock mu(Thread::Current(), list->monitor_list_lock_);
+      for (Monitor* list_mon : list->list_) {
+        if (mon == list_mon) {
+          return true;  // Found our monitor.
+        }
       }
+      return false;  // Fail - unowned monitor in an object.
     }
-    return found;
-  } else {
-    // TODO: thin lock validity checking.
-    return LW_SHAPE(lock_word) == LW_SHAPE_THIN;
+    default:
+      LOG(FATAL) << "Unreachable";
+      return false;
   }
 }
 
 void Monitor::TranslateLocation(const mirror::ArtMethod* method, uint32_t dex_pc,
-                                const char*& source_file, uint32_t& line_number) const {
+                                const char** source_file, uint32_t* line_number) const {
   // If method is null, location is unknown
   if (method == NULL) {
-    source_file = "";
-    line_number = 0;
+    *source_file = "";
+    *line_number = 0;
     return;
   }
   MethodHelper mh(method);
-  source_file = mh.GetDeclaringClassSourceFile();
-  if (source_file == NULL) {
-    source_file = "";
+  *source_file = mh.GetDeclaringClassSourceFile();
+  if (*source_file == NULL) {
+    *source_file = "";
   }
-  line_number = mh.GetLineNumFromDexPC(dex_pc);
+  *line_number = mh.GetLineNumFromDexPC(dex_pc);
+}
+
+uint32_t Monitor::GetOwnerThreadId() {
+  MutexLock mu(Thread::Current(), monitor_lock_);
+  Thread* owner = owner_;
+  if (owner != NULL) {
+    return owner->GetThreadId();
+  } else {
+    return ThreadList::kInvalidThreadId;
+  }
 }
 
 MonitorList::MonitorList()
@@ -1041,22 +970,26 @@
   }
 }
 
-MonitorInfo::MonitorInfo(mirror::Object* o) : owner(NULL), entry_count(0) {
-  uint32_t lock_word = *o->GetRawLockWordAddress();
-  if (LW_SHAPE(lock_word) == LW_SHAPE_THIN) {
-    uint32_t owner_thin_lock_id = LW_LOCK_OWNER(lock_word);
-    if (owner_thin_lock_id != 0) {
-      owner = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(owner_thin_lock_id);
-      entry_count = 1 + LW_LOCK_COUNT(lock_word);
-    }
-    // Thin locks have no waiters.
-  } else {
-    CHECK_EQ(LW_SHAPE(lock_word), LW_SHAPE_FAT);
-    Monitor* monitor = LW_MONITOR(lock_word);
-    owner = monitor->owner_;
-    entry_count = 1 + monitor->lock_count_;
-    for (Thread* waiter = monitor->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
-      waiters.push_back(waiter);
+MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) {
+  DCHECK(obj != NULL);
+
+  LockWord lock_word = obj->GetLockWord();
+  switch (lock_word.GetState()) {
+    case LockWord::kUnlocked:
+      break;
+    case LockWord::kThinLocked:
+      owner_ = Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
+      entry_count_ = 1 + lock_word.ThinLockCount();
+      // Thin locks have no waiters.
+      break;
+    case LockWord::kFatLocked: {
+      Monitor* mon = lock_word.FatLockMonitor();
+      owner_ = mon->owner_;
+      entry_count_ = 1 + mon->lock_count_;
+      for (Thread* waiter = mon->wait_set_; waiter != NULL; waiter = waiter->wait_next_) {
+        waiters_.push_back(waiter);
+      }
+      break;
     }
   }
 }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 71fe716..044f76e 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,47 +30,28 @@
 
 namespace art {
 
-/*
- * Monitor shape field. Used to distinguish thin locks from fat locks.
- */
-#define LW_SHAPE_THIN 0
-#define LW_SHAPE_FAT 1
-
-/*
- * Hash state field.  Used to signify that an object has had its
- * identity hash code exposed or relocated.
- */
-#define LW_HASH_STATE_UNHASHED 0
-#define LW_HASH_STATE_HASHED 1
-#define LW_HASH_STATE_HASHED_AND_MOVED 3
-#define LW_HASH_STATE_MASK 0x3
-#define LW_HASH_STATE_SHIFT 1
-#define LW_HASH_STATE(x) (((x) >> LW_HASH_STATE_SHIFT) & LW_HASH_STATE_MASK)
-
-/*
- * Lock owner field.  Contains the thread id of the thread currently
- * holding the lock.
- */
-#define LW_LOCK_OWNER_MASK 0xffff
-#define LW_LOCK_OWNER_SHIFT 3
-#define LW_LOCK_OWNER(x) (((x) >> LW_LOCK_OWNER_SHIFT) & LW_LOCK_OWNER_MASK)
-
 namespace mirror {
   class ArtMethod;
   class Object;
 }  // namespace mirror
+class LockWord;
 class Thread;
 class StackVisitor;
 
 class Monitor {
  public:
+  // The default number of spins that are done before thread suspension is used to forcibly inflate
+  // a lock word. See Runtime::max_spins_before_thin_lock_inflation_.
+  constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;
+
   ~Monitor();
 
   static bool IsSensitiveThread();
   static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
 
-  static uint32_t GetThinLockId(uint32_t raw_lock_word)
-      NO_THREAD_SAFETY_ANALYSIS;  // Reading lock owner without holding lock is racy.
+  // Return the thread id of the lock owner or 0 when there is no owner.
+  static uint32_t GetLockOwnerThreadId(mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS;  // TODO: Reading lock owner without holding lock is racy.
 
   static void MonitorEnter(Thread* thread, mirror::Object* obj)
       EXCLUSIVE_LOCK_FUNCTION(monitor_lock_)
@@ -80,9 +61,13 @@
       UNLOCK_FUNCTION(monitor_lock_);
 
   static void Notify(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    InflateAndNotify(self, obj, false);
+  }
   static void NotifyAll(Thread* self, mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    InflateAndNotify(self, obj, true);
+  }
   static void Wait(Thread* self, mirror::Object* obj, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -92,7 +77,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Used to implement JDWP's ThreadReference.CurrentContendedMonitor.
-  static mirror::Object* GetContendedMonitor(Thread* thread);
+  static mirror::Object* GetContendedMonitor(Thread* thread)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Calls 'callback' once for each lock held in the single stack frame represented by
   // the current state of 'stack_visitor'.
@@ -100,19 +86,33 @@
                          void* callback_context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static bool IsValidLockWord(int32_t lock_word);
+  static bool IsValidLockWord(LockWord lock_word);
 
-  mirror::Object* GetObject();
+  // TODO: SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  mirror::Object* GetObject() const {
+    return obj_;
+  }
+
   void SetObject(mirror::Object* object);
 
+  Thread* GetOwner() const NO_THREAD_SAFETY_ANALYSIS {
+    return owner_;
+  }
+
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Install the monitor into its object, may fail if another thread installs a different monitor
+  // first.
+  bool Install(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void AppendToWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
   void RemoveFromWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
 
-  static void Inflate(Thread* self, mirror::Object* obj)
+  static void Inflate(Thread* self, Thread* owner, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
@@ -123,43 +123,49 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Lock(Thread* self) EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
-  bool Unlock(Thread* thread, bool for_wait) UNLOCK_FUNCTION(monitor_lock_);
-
-  void Notify(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyWithLock(Thread* self)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  void Lock(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Unlock(Thread* thread)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void NotifyAll(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
-  void NotifyAllWithLock()
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+  static void InflateAndNotify(Thread* self, mirror::Object* obj, bool notify_all)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Notify(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void NotifyAll(Thread* self)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
   void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow, ThreadState why)
-      NO_THREAD_SAFETY_ANALYSIS;
-  void WaitWithLock(Thread* self, int64_t ms, int32_t ns, bool interruptShouldThrow, ThreadState why)
-      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      LOCKS_EXCLUDED(monitor_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
   void TranslateLocation(const mirror::ArtMethod* method, uint32_t pc,
-                         const char*& source_file, uint32_t& line_number) const
+                         const char** source_file, uint32_t* line_number) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  uint32_t GetOwnerThreadId();
+
   static bool (*is_sensitive_thread_hook_)();
   static uint32_t lock_profiling_threshold_;
 
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
   // Which thread currently owns the lock?
-  Thread* volatile owner_;
+  Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of (for debugging).
+  // What object are we part of.
   mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
@@ -205,9 +211,9 @@
  public:
   explicit MonitorInfo(mirror::Object* o) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Thread* owner;
-  size_t entry_count;
-  std::vector<Thread*> waiters;
+  Thread* owner_;
+  size_t entry_count_;
+  std::vector<Thread*> waiters_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(MonitorInfo);
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 8efa072..d89290b 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -81,7 +81,7 @@
   mirror::ArtMethod* m = self->GetCurrentMethod(&pc);
   const char* filename;
   uint32_t line_number;
-  TranslateLocation(m, pc, filename, line_number);
+  TranslateLocation(m, pc, &filename, &line_number);
   cp = EventLogWriteString(cp, filename, strlen(filename));
 
   // Emit the source code line number, 5 bytes.
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index eaf67b8..5508270 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -34,7 +34,7 @@
   }
   // Suspend thread to build stack trace.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -42,7 +42,7 @@
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
     return trace;
   } else {
     if (timed_out) {
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index f8eeb29..9b83206 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -26,7 +26,7 @@
   ScopedObjectAccess soa(env);
   mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
   // Should only be called while holding the lock on the dex cache.
-  DCHECK_EQ(dex_cache->GetThinLockId(), soa.Self()->GetThinLockId());
+  DCHECK_EQ(dex_cache->GetLockOwnerThreadId(), soa.Self()->GetThreadId());
   const DexFile* dex_file = dex_cache->GetDexFile();
   if (dex_file == NULL) {
     return NULL;
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index e85ef09..a9de086 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -122,13 +122,13 @@
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer, true, &timed_out);
+  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
   if (thread != NULL) {
     {
       ScopedObjectAccess soa(env);
       thread->SetThreadName(name.c_str());
     }
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    Runtime::Current()->GetThreadList()->Resume(thread, false);
   } else if (timed_out) {
     LOG(ERROR) << "Trying to set thread name to '" << name.c_str() << "' failed as the thread "
         "failed to suspend within a generous timeout.";
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 0676968..4f81a0b 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -44,22 +44,10 @@
  * NULL on failure, e.g. if the threadId couldn't be found.
  */
 static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint thin_lock_id) {
-  ScopedLocalRef<jobject> peer(env, NULL);
-  {
-    Thread* t = Runtime::Current()->GetThreadList()->FindThreadByThinLockId(thin_lock_id);
-    if (t == NULL) {
-      return NULL;
-    }
-    ScopedObjectAccess soa(env);
-    peer.reset(soa.AddLocalReference<jobject>(t->GetPeer()));
-  }
-  if (peer.get() == NULL) {
-    return NULL;
-  }
-
   // Suspend thread to build stack trace.
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   bool timed_out;
-  Thread* thread = Thread::SuspendForDebugger(peer.get(), true, &timed_out);
+  Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
   if (thread != NULL) {
     jobject trace;
     {
@@ -67,7 +55,7 @@
       trace = thread->CreateInternalStackTrace(soa);
     }
     // Restart suspended thread.
-    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    thread_list->Resume(thread, false);
     return Thread::InternalStackTraceToStackTraceElementArray(env, trace);
   } else {
     if (timed_out) {
@@ -115,7 +103,7 @@
   GetTaskStats(t->GetTid(), &native_thread_state, &utime, &stime, &task_cpu);
 
   std::vector<uint8_t>& bytes = *reinterpret_cast<std::vector<uint8_t>*>(context);
-  JDWP::Append4BE(bytes, t->GetThinLockId());
+  JDWP::Append4BE(bytes, t->GetThreadId());
   JDWP::Append1BE(bytes, Dbg::ToJdwpThreadStatus(t->GetState()));
   JDWP::Append4BE(bytes, t->GetTid());
   JDWP::Append4BE(bytes, utime);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index f83db90..692cecc 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -36,7 +36,8 @@
 
 class ObjectLock {
  public:
-  explicit ObjectLock(Thread* self, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  explicit ObjectLock(Thread* self, mirror::Object* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : self_(self), obj_(object) {
     CHECK(object != NULL);
     obj_->MonitorEnter(self_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 17455dc..a936baa 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -75,6 +75,7 @@
       is_explicit_gc_disabled_(false),
       default_stack_size_(0),
       heap_(NULL),
+      max_spins_before_thin_lock_inflation_(Monitor::kDefaultMaxSpinsBeforeThinLockInflation),
       monitor_list_(NULL),
       thread_list_(NULL),
       intern_table_(NULL),
@@ -350,6 +351,7 @@
   // Only the main GC thread, no workers.
   parsed->conc_gc_threads_ = 0;
   parsed->stack_size_ = 0;  // 0 means default.
+  parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
 
   parsed->is_compiler_ = false;
@@ -510,6 +512,10 @@
         return NULL;
       }
       parsed->stack_size_ = size;
+    } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
+      parsed->max_spins_before_thin_lock_inflation_ =
+          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
+                  nullptr, 10);
     } else if (option == "-XX:LongPauseLogThreshold") {
       parsed->long_pause_log_threshold_ =
           ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
@@ -866,6 +872,8 @@
   default_stack_size_ = options->stack_size_;
   stack_trace_file_ = options->stack_trace_file_;
 
+  max_spins_before_thin_lock_inflation_ = options->max_spins_before_thin_lock_inflation_;
+
   monitor_list_ = new MonitorList;
   thread_list_ = new ThreadList;
   intern_table_ = new InternTable;
@@ -901,7 +909,7 @@
   // objects. We can't supply a thread group yet; it will be fixed later. Since we are the main
   // thread, we do not get a java peer.
   Thread* self = Thread::Attach("main", false, NULL, false);
-  CHECK_EQ(self->thin_lock_id_, ThreadList::kMainId);
+  CHECK_EQ(self->thin_lock_thread_id_, ThreadList::kMainThreadId);
   CHECK(self != NULL);
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 552cfdf..36b0bd6 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -112,6 +112,7 @@
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
     size_t stack_size_;
+    size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
     size_t lock_profiling_threshold_;
     std::string stack_trace_file_;
@@ -283,6 +284,10 @@
     return java_vm_;
   }
 
+  size_t GetMaxSpinsBeforeThinkLockInflation() const {
+    return max_spins_before_thin_lock_inflation_;
+  }
+
   MonitorList* GetMonitorList() const {
     return monitor_list_;
   }
@@ -455,6 +460,8 @@
 
   gc::Heap* heap_;
 
+  // The number of spins that are done before thread suspension is used to forcibly inflate.
+  size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;
 
   ThreadList* thread_list_;
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 15eb27d..fe62e25 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -147,7 +147,6 @@
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   if (self->ReadFlag(kCheckpointRequest)) {
     self->RunCheckpointFunction();
-    self->AtomicClearFlag(kCheckpointRequest);
   }
   self->EndAssertNoThreadSuspension(old_cause);
   thread_list->ResumeAll();
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 4552062..7d28785 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -80,17 +80,16 @@
   union StateAndFlags new_state_and_flags;
   do {
     old_state_and_flags = state_and_flags_;
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0)) {
+      RunCheckpointFunction();
+      continue;
+    }
     // Copy over flags and try to clear the checkpoint bit if it is set.
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest;
     new_state_and_flags.as_struct.state = new_state;
     // CAS the value without a memory barrier, that will occur in the unlock below.
   } while (UNLIKELY(android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                        &state_and_flags_.as_int) != 0));
-  // If we toggled the checkpoint flag we must have cleared it.
-  uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags;
-  if (UNLIKELY((flag_change & kCheckpointRequest) != 0)) {
-    RunCheckpointFunction();
-  }
   // Release share on mutator_lock_.
   Locks::mutator_lock_->SharedUnlock(this);
 }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7040337..de14dbb 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -311,7 +311,7 @@
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
   DCHECK_EQ(Thread::Current(), this);
 
-  thin_lock_id_ = thread_list->AllocThreadId(this);
+  thin_lock_thread_id_ = thread_list->AllocThreadId(this);
   InitStackHwm();
 
   jni_env_ = new JNIEnvExt(this, java_vm);
@@ -476,9 +476,9 @@
 
 void Thread::ShortDump(std::ostream& os) const {
   os << "Thread[";
-  if (GetThinLockId() != 0) {
+  if (GetThreadId() != 0) {
     // If we're in kStarting, we won't have a thin lock id or tid yet.
-    os << GetThinLockId()
+    os << GetThreadId()
              << ",tid=" << GetTid() << ',';
   }
   os << GetState()
@@ -574,18 +574,32 @@
   ATRACE_BEGIN("Checkpoint function");
   checkpoint_function_->Run(this);
   ATRACE_END();
+  checkpoint_function_ = NULL;
+  AtomicClearFlag(kCheckpointRequest);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
-  CHECK(!ReadFlag(kCheckpointRequest)) << "Already have a pending checkpoint request";
-  checkpoint_function_ = function;
   union StateAndFlags old_state_and_flags = state_and_flags_;
+  if (old_state_and_flags.as_struct.state != kRunnable) {
+    return false;  // Fail, thread is suspended and so can't run a checkpoint.
+  }
+  if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
+    return false;  // Fail, already a checkpoint pending.
+  }
+  CHECK(checkpoint_function_ == NULL);
+  checkpoint_function_ = function;
+  // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   old_state_and_flags.as_struct.state = kRunnable;
   union StateAndFlags new_state_and_flags = old_state_and_flags;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                          &state_and_flags_.as_int);
+  if (UNLIKELY(succeeded != 0)) {
+    // The thread changed state before the checkpoint was installed.
+    CHECK(checkpoint_function_ == function);
+    checkpoint_function_ = NULL;
+  }
   return succeeded == 0;
 }
 
@@ -600,88 +614,6 @@
   VLOG(threads) << this << " self-reviving";
 }
 
-Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timed_out) {
-  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
-  useconds_t total_delay_us = 0;
-  useconds_t delay_us = 0;
-  bool did_suspend_request = false;
-  *timed_out = false;
-  while (true) {
-    Thread* thread;
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      Thread* self = soa.Self();
-      MutexLock mu(self, *Locks::thread_list_lock_);
-      thread = Thread::FromManagedThread(soa, peer);
-      if (thread == NULL) {
-        JNIEnv* env = self->GetJniEnv();
-        ScopedLocalRef<jstring> scoped_name_string(env,
-                                                   (jstring)env->GetObjectField(peer,
-                                                              WellKnownClasses::java_lang_Thread_name));
-        ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
-        if (scoped_name_chars.c_str() == NULL) {
-            LOG(WARNING) << "No such thread for suspend: " << peer;
-            env->ExceptionClear();
-        } else {
-            LOG(WARNING) << "No such thread for suspend: " << peer << ":" << scoped_name_chars.c_str();
-        }
-
-        return NULL;
-      }
-      {
-        MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
-        if (request_suspension) {
-          thread->ModifySuspendCount(soa.Self(), +1, true /* for_debugger */);
-          request_suspension = false;
-          did_suspend_request = true;
-        }
-        // IsSuspended on the current thread will fail as the current thread is changed into
-        // Runnable above. As the suspend count is now raised if this is the current thread
-        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
-        // to just explicitly handle the current thread in the callers to this code.
-        CHECK_NE(thread, soa.Self()) << "Attempt to suspend the current thread for the debugger";
-        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
-        // count, or else we've waited and it has self suspended) or is the current thread, we're
-        // done.
-        if (thread->IsSuspended()) {
-          return thread;
-        }
-        if (total_delay_us >= kTimeoutUs) {
-          LOG(ERROR) << "Thread suspension timed out: " << peer;
-          if (did_suspend_request) {
-            thread->ModifySuspendCount(soa.Self(), -1, true /* for_debugger */);
-          }
-          *timed_out = true;
-          return NULL;
-        }
-      }
-      // Release locks and come out of runnable state.
-    }
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<LockLevel>(i));
-      if (held_mutex != NULL) {
-        LOG(FATAL) << "Holding " << held_mutex->GetName()
-            << " while sleeping for thread suspension";
-      }
-    }
-    {
-      useconds_t new_delay_us = delay_us * 2;
-      CHECK_GE(new_delay_us, delay_us);
-      if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
-        delay_us = new_delay_us;
-      }
-    }
-    if (delay_us == 0) {
-      sched_yield();
-      // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
-      delay_us = 500;
-    } else {
-      usleep(delay_us);
-      total_delay_us += delay_us;
-    }
-  }
-}
-
 void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
   std::string group_name;
   int priority;
@@ -718,7 +650,7 @@
       os << " daemon";
     }
     os << " prio=" << priority
-       << " tid=" << thread->GetThinLockId()
+       << " tid=" << thread->GetThreadId()
        << " " << thread->GetState();
     if (thread->IsStillStarting()) {
       os << " (still starting up)";
@@ -968,9 +900,9 @@
       jpeer_(NULL),
       stack_begin_(NULL),
       stack_size_(0),
+      thin_lock_thread_id_(0),
       stack_trace_sample_(NULL),
       trace_clock_base_(0),
-      thin_lock_id_(0),
       tid_(0),
       wait_mutex_(new Mutex("a thread wait mutex")),
       wait_cond_(new ConditionVariable("a thread wait condition variable", *wait_mutex_)),
@@ -1718,7 +1650,7 @@
   DO_THREAD_OFFSET(self_);
   DO_THREAD_OFFSET(stack_end_);
   DO_THREAD_OFFSET(suspend_count_);
-  DO_THREAD_OFFSET(thin_lock_id_);
+  DO_THREAD_OFFSET(thin_lock_thread_id_);
   // DO_THREAD_OFFSET(top_of_managed_stack_);
   // DO_THREAD_OFFSET(top_of_managed_stack_pc_);
   DO_THREAD_OFFSET(top_sirt_);
@@ -2001,7 +1933,7 @@
   if (object == NULL) {
     return false;
   }
-  return object->GetThinLockId() == thin_lock_id_;
+  return object->GetLockOwnerThreadId() == thin_lock_thread_id_;
 }
 
 // RootVisitor parameters are: (const Object* obj, size_t vreg, const StackVisitor* visitor).
diff --git a/runtime/thread.h b/runtime/thread.h
index 2d9e009..3aa1373 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -154,7 +154,8 @@
   void ModifySuspendCount(Thread* self, int delta, bool for_debugger)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
-  bool RequestCheckpoint(Closure* function);
+  bool RequestCheckpoint(Closure* function)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
@@ -175,14 +176,6 @@
       UNLOCK_FUNCTION(Locks::mutator_lock_)
       ALWAYS_INLINE;
 
-  // Wait for a debugger suspension on the thread associated with the given peer. Returns the
-  // thread on success, else NULL. If the thread should be suspended then request_suspension should
-  // be true on entry. If the suspension times out then *timeout is set to true.
-  static Thread* SuspendForDebugger(jobject peer,  bool request_suspension, bool* timed_out)
-      LOCKS_EXCLUDED(Locks::mutator_lock_,
-                     Locks::thread_list_lock_,
-                     Locks::thread_suspend_count_lock_);
-
   // Once called thread suspension will cause an assertion failure.
 #ifndef NDEBUG
   const char* StartAssertNoThreadSuspension(const char* cause) {
@@ -219,7 +212,7 @@
     return daemon_;
   }
 
-  bool HoldsLock(mirror::Object*);
+  bool HoldsLock(mirror::Object*) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Changes the priority of this thread to match that of the java.lang.Thread object.
@@ -237,8 +230,8 @@
    */
   static int GetNativePriority();
 
-  uint32_t GetThinLockId() const {
-    return thin_lock_id_;
+  uint32_t GetThreadId() const {
+    return thin_lock_thread_id_;
   }
 
   pid_t GetTid() const {
@@ -414,7 +407,7 @@
   }
 
   static ThreadOffset ThinLockIdOffset() {
-    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_id_));
+    return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
   }
 
   static ThreadOffset CardTableOffset() {
@@ -702,18 +695,18 @@
   // Size of the stack
   size_t stack_size_;
 
-  // Pointer to previous stack trace captured by sampling profiler.
-  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
-
-  // The clock base used for tracing.
-  uint64_t trace_clock_base_;
-
   // Thin lock thread id. This is a small integer used by the thin lock implementation.
   // This is not to be confused with the native thread's tid, nor is it the value returned
   // by java.lang.Thread.getId --- this is a distinct value, used only for locking. One
   // important difference between this id and the ids visible to managed code is that these
   // ones get reused (to ensure that they fit in the number of bits available).
-  uint32_t thin_lock_id_;
+  uint32_t thin_lock_thread_id_;
+
+  // Pointer to previous stack trace captured by sampling profiler.
+  std::vector<mirror::ArtMethod*>* stack_trace_sample_;
+
+  // The clock base used for tracing.
+  uint64_t trace_clock_base_;
 
   // System thread id.
   pid_t tid_;
@@ -722,13 +715,16 @@
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
   mutable Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Condition variable waited upon during a wait.
   ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
-  // Pointer to the monitor lock we're currently waiting on (or NULL).
+  // Pointer to the monitor lock we're currently waiting on or NULL if not waiting.
   Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
   // Thread "interrupted" status; stays raised until queried or thrown.
   bool32_t interrupted_ GUARDED_BY(wait_mutex_);
-  // The next thread in the wait set this thread is part of.
+  // The next thread in the wait set this thread is part of or NULL if not waiting.
   Thread* wait_next_;
+
+
   // If we're blocked in MonitorEnter, this is the object we're trying to lock.
   mirror::Object* monitor_enter_object_;
 
@@ -785,7 +781,8 @@
   // Cause for last suspension.
   const char* last_no_thread_suspension_cause_;
 
-  // Pending checkpoint functions.
+  // Pending checkpoint function or NULL if non-pending. Installation guarding by
+  // Locks::thread_suspend_count_lock_.
   Closure* checkpoint_function_;
 
  public:
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 44cf810..ff1ed2a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -17,6 +17,8 @@
 #include "thread_list.h"
 
 #include <dirent.h>
+#include <ScopedLocalRef.h>
+#include <ScopedUtfChars.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -24,8 +26,13 @@
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "debugger.h"
+#include "jni_internal.h"
+#include "lock_word.h"
+#include "monitor.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -33,6 +40,7 @@
     : allocated_ids_lock_("allocated thread ids lock"),
       suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
+  CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
 
 ThreadList::~ThreadList() {
@@ -160,18 +168,19 @@
     // Call a checkpoint function for each thread, threads which are suspend get their checkpoint
     // manually called.
     MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : list_) {
       if (thread != self) {
-        for (;;) {
+        while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
             // This thread will run it's checkpoint some time in the near future.
             count++;
             break;
           } else {
             // We are probably suspended, try to make sure that we stay suspended.
-            MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
             // The thread switched back to runnable.
             if (thread->GetState() == kRunnable) {
+              // Spurious fail, try again.
               continue;
             }
             thread->ModifySuspendCount(self, +1, false);
@@ -204,7 +213,7 @@
       }
     }
     // We know for sure that the thread is suspended at this point.
-    thread->RunCheckpointFunction();
+    checkpoint_function->Run(thread);
     {
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
       thread->ModifySuspendCount(self, -1, false);
@@ -322,6 +331,178 @@
   VLOG(threads) << "Resume(" << *thread << ") complete";
 }
 
+static void ThreadSuspendByPeerWarning(Thread* self, int level, const char* message, jobject peer) {
+  JNIEnvExt* env = self->GetJniEnv();
+  ScopedLocalRef<jstring>
+      scoped_name_string(env, (jstring)env->GetObjectField(peer,
+                                                          WellKnownClasses::java_lang_Thread_name));
+  ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
+  if (scoped_name_chars.c_str() == NULL) {
+      LOG(level) << message << ": " << peer;
+      env->ExceptionClear();
+  } else {
+      LOG(level) << message << ": " << peer << ":" << scoped_name_chars.c_str();
+  }
+}
+
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep and total_delay_us
+// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
+// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
+  for (int i = kLockLevelCount - 1; i >= 0; --i) {
+    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
+    if (held_mutex != NULL) {
+      LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
+    }
+  }
+  {
+    useconds_t new_delay_us = (*delay_us) * 2;
+    CHECK_GE(new_delay_us, *delay_us);
+    if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+      *delay_us = new_delay_us;
+    }
+  }
+  if ((*delay_us) == 0) {
+    sched_yield();
+    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+    (*delay_us) = 500;
+  } else {
+    usleep(*delay_us);
+    (*total_delay_us) += (*delay_us);
+  }
+}
+
+Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
+                                        bool debug_suspension, bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  while (true) {
+    Thread* thread;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      thread = Thread::FromManagedThread(soa, peer);
+      if (thread == NULL) {
+        ThreadSuspendByPeerWarning(self, WARNING, "No such thread for suspend", peer);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (request_suspension) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          request_suspension = false;
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByPeerWarning(self, ERROR, "Thread suspension timed out", peer);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+static void ThreadSuspendByThreadIdWarning(int level, const char* message, uint32_t thread_id) {
+  LOG(level) << StringPrintf("%s: %d", message, thread_id);
+}
+
+Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
+                                            bool* timed_out) {
+  static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timed_out = false;
+  Thread* self = Thread::Current();
+  CHECK_NE(thread_id, kInvalidThreadId);
+  while (true) {
+    Thread* thread = NULL;
+    {
+      ScopedObjectAccess soa(self);
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      for (const auto& it : list_) {
+        if (it->GetThreadId() == thread_id) {
+          thread = it;
+          break;
+        }
+      }
+      if (thread == NULL) {
+        // There's a race in inflating a lock and the owner giving up ownership and then dying.
+        ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
+        return NULL;
+      }
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        if (!did_suspend_request) {
+          thread->ModifySuspendCount(self, +1, debug_suspension);
+          did_suspend_request = true;
+        } else {
+          // If the caller isn't requesting suspension, a suspension should have already occurred.
+          CHECK_GT(thread->GetSuspendCount(), 0);
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. It's simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, self) << "Attempt to suspend the current thread for the debugger";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          ThreadSuspendByThreadIdWarning(ERROR, "Thread suspension timed out", thread_id);
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
+          }
+          *timed_out = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+  }
+}
+
+Thread* ThreadList::FindThreadByThreadId(uint32_t thin_lock_id) {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  for (const auto& thread : list_) {
+    if (thread->GetThreadId() == thin_lock_id) {
+      CHECK(thread == self || thread->IsSuspended());
+      return thread;
+    }
+  }
+  return NULL;
+}
+
 void ThreadList::SuspendAllForDebugger() {
   Thread* self = Thread::Current();
   Thread* debug_thread = Dbg::GetDebugThread();
@@ -528,8 +709,8 @@
   // suspend and so on, must happen at this point, and not in ~Thread.
   self->Destroy();
 
-  uint32_t thin_lock_id = self->thin_lock_id_;
-  self->thin_lock_id_ = 0;
+  uint32_t thin_lock_id = self->thin_lock_thread_id_;
+  self->thin_lock_thread_id_ = 0;
   ReleaseThreadId(self, thin_lock_id);
   while (self != NULL) {
     // Remove and delete the Thread* while holding the thread_list_lock_ and
@@ -609,14 +790,4 @@
   allocated_ids_.reset(id);
 }
 
-Thread* ThreadList::FindThreadByThinLockId(uint32_t thin_lock_id) {
-  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
-  for (const auto& thread : list_) {
-    if (thread->GetThinLockId() == thin_lock_id) {
-      return thread;
-    }
-  }
-  return NULL;
-}
-
 }  // namespace art
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 3df3e2c..b1b3e88 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_THREAD_LIST_H_
 
 #include "base/mutex.h"
+#include "jni.h"
 #include "root_visitor.h"
 
 #include <bitset>
@@ -31,8 +32,8 @@
 class ThreadList {
  public:
   static const uint32_t kMaxThreadId = 0xFFFF;
-  static const uint32_t kInvalidId = 0;
-  static const uint32_t kMainId = 1;
+  static const uint32_t kInvalidThreadId = 0;
+  static const uint32_t kMainThreadId = 1;
 
   explicit ThreadList();
   ~ThreadList();
@@ -59,6 +60,30 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
+
+  // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
+  // else NULL. The peer is used to identify the thread to avoid races with the thread terminating.
+  // If the thread should be suspended then value of request_suspension should be true otherwise
+  // the routine will wait for a previous suspend request. If the suspension times out then *timeout
+  // is set to true.
+  static Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
+                                     bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
+  // thread on success else NULL. The thread id is used to identify the thread to avoid races with
+  // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
+  // thread, that may be terminating. If the suspension times out then *timeout is set to true.
+  Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
+      LOCKS_EXCLUDED(Locks::mutator_lock_,
+                     Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
+  // Find an already suspended thread (or self) by its id.
+  Thread* FindThreadByThreadId(uint32_t thin_lock_id);
+
   // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside
   // of the suspend check. Returns how many checkpoints we should expect to run.
   size_t RunCheckpoint(Closure* checkpoint_function);
@@ -99,8 +124,6 @@
     return list_;
   }
 
-  Thread* FindThreadByThinLockId(uint32_t thin_lock_id);
-
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);