Revert "ART: Implement literal pool for arm, fix branch fixup." This reverts commit f38caa68cce551fb153dff37d01db518e58ed00f. Change-Id: Id88b82cc949d288cfcdb3c401b96f884b777fc40 Reason: broke the tests.

commit: fbeb4aede0ddc5b1e6a5a3a40cc6266fe8518c98 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Tue Jun 16 11:32:01 2015 +0000
committer: Vladimir Marko <vmarko@google.com> Tue Jun 16 11:32:01 2015 +0000
tree: 76ab28cf259def4dccec529df217fd760f27d2aa
parent: f38caa68cce551fb153dff37d01db518e58ed00f [diff]
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 016f28e..3a0d520 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc

@@ -56,7 +56,7 @@
     jni_asm->IncreaseFrameSize(32);
     jni_asm->DecreaseFrameSize(32);
     jni_asm->RemoveFrame(frame_size, callee_save_regs);
-    jni_asm->FinalizeCode();
+    jni_asm->EmitSlowPaths();
     std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
     MemoryRegion code(&actual_asm[0], actual_asm.size());
     jni_asm->FinalizeInstructions(code);

diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 85fd696..4d7d86c 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc

@@ -474,7 +474,7 @@
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
   // 17. Finalize code generation
-  __ FinalizeCode();
+  __ EmitSlowPaths();
   size_t cs = __ CodeSize();
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());

diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index a3e889f..d010430 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc

@@ -82,7 +82,6 @@
       arm::kLoadWord, arm::PC, arm::R0,
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
   assembler.bkpt(0);
-  assembler.FinalizeCode();
   std::vector<uint8_t> thunk_code(assembler.CodeSize());
   MemoryRegion code(thunk_code.data(), thunk_code.size());
   assembler.FinalizeInstructions(code);

diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 29355d6..ee48789 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc

@@ -233,7 +233,7 @@
       kArm64PointerSize).Int32Value());
   assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
   // Ensure we emit the literal pool.
-  assembler.FinalizeCode();
+  assembler.EmitSlowPaths();
   std::vector<uint8_t> thunk_code(assembler.CodeSize());
   MemoryRegion code(thunk_code.data(), thunk_code.size());
   assembler.FinalizeInstructions(code);

diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a82b08a..e6b1f7c 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h

@@ -358,7 +358,6 @@
         number_of_register_pairs_(number_of_register_pairs),
         core_callee_save_mask_(core_callee_save_mask),
         fpu_callee_save_mask_(fpu_callee_save_mask),
-        stack_map_stream_(graph->GetArena()),
         is_baseline_(false),
         graph_(graph),
         compiler_options_(compiler_options),
@@ -366,7 +365,8 @@
         block_order_(nullptr),
         current_block_index_(0),
         is_leaf_(true),
-        requires_current_method_(false) {}
+        requires_current_method_(false),
+        stack_map_stream_(graph->GetArena()) {}
 
   // Register allocation logic.
   void AllocateRegistersLocally(HInstruction* instruction) const;
@@ -436,8 +436,6 @@
   const uint32_t core_callee_save_mask_;
   const uint32_t fpu_callee_save_mask_;
 
-  StackMapStream stack_map_stream_;
-
   // Whether we are using baseline.
   bool is_baseline_;
 
@@ -466,6 +464,8 @@
   // Whether an instruction in the graph accesses the current method.
   bool requires_current_method_;
 
+  StackMapStream stack_map_stream_;
+
   friend class OptimizingCFITest;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);

diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ec0d56d..f773106 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc

@@ -392,26 +392,12 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(),
+      assembler_(false /* can_relocate_branches */),
       isa_features_(isa_features) {
   // Save the PC register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(PC));
 }
 
-void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
-  // Ensure that we fix up branches and literal loads and emit the literal pool.
-  __ FinalizeCode();
-
-  // Adjust native pc offsets in stack maps.
-  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
-    uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
-    uint32_t new_position = __ GetAdjustedPosition(old_position);
-    stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
-  }
-
-  CodeGenerator::Finalize(allocator);
-}
-
 Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
   switch (type) {
     case Primitive::kPrimLong: {
@@ -2856,7 +2842,7 @@
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  Label less, greater, done;
+  NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
@@ -2952,7 +2938,7 @@
                                                           Register temp1,
                                                           Register temp2,
                                                           HInstruction* instruction) {
-  Label fail;
+  NearLabel fail;
   if (offset != 0) {
     __ LoadImmediate(temp1, offset);
     __ add(IP, addr, ShifterOperand(temp1));
@@ -3632,7 +3618,7 @@
                                   Register object,
                                   Register value,
                                   bool can_be_null) {
-  Label is_null;
+  NearLabel is_null;
   if (can_be_null) {
     __ CompareAndBranchIfZero(value, &is_null);
   }
@@ -4061,7 +4047,7 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Label done, zero;
+  NearLabel done, zero;
   SlowPathCodeARM* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
@@ -4118,15 +4104,19 @@
       instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
   codegen_->AddSlowPath(slow_path);
 
+  NearLabel done;
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, slow_path->GetExitLabel());
+    __ CompareAndBranchIfZero(obj, &done);
   }
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
   __ cmp(temp, ShifterOperand(cls));
   __ b(slow_path->GetEntryLabel(), NE);
   __ Bind(slow_path->GetExitLabel());
+  if (instruction->MustDoNullCheck()) {
+    __ Bind(&done);
+  }
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {

diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index c5a28ba..d84f2d3 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h

@@ -289,8 +289,6 @@
     block_labels_.SetSize(GetGraph()->GetBlocks().Size());
   }
 
-  void Finalize(CodeAllocator* allocator) OVERRIDE;
-
   const ArmInstructionSetFeatures& GetInstructionSetFeatures() const {
     return isa_features_;
   }

diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 550ed70..bc3653d 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h

@@ -134,11 +134,6 @@
     return stack_maps_.GetRawStorage()[i];
   }
 
-  void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
-    DCHECK_LT(i, stack_maps_.Size());
-    stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset;
-  }
-
   uint32_t ComputeMaxNativePcOffset() const;
 
   // Prepares the stream to fill in a memory region. Must be called before FillIn.

diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index facc630..cb51ed8 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc

@@ -17,21 +17,21 @@
 #include "trampoline_compiler.h"
 
 #include "jni_env_ext.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "utils/arm/assembler_arm.h"
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips64/assembler_mips64.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86_64/assembler_x86_64.h"
 
-#define __ assembler.
+#define __ assembler->
 
 namespace art {
 
 namespace arm {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  Thumb2Assembler assembler;
+  std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
@@ -46,11 +46,10 @@
   }
   __ bkpt(0);
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }
@@ -59,7 +58,7 @@
 namespace arm64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
-  Arm64Assembler assembler;
+  std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
@@ -83,11 +82,11 @@
       break;
   }
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  assembler->EmitSlowPaths();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }
@@ -96,7 +95,7 @@
 namespace mips {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<4> offset) {
-  MipsAssembler assembler;
+  std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -113,11 +112,10 @@
   __ Nop();
   __ Break();
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }
@@ -126,7 +124,7 @@
 namespace mips64 {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset<8> offset) {
-  Mips64Assembler assembler;
+  std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64)));
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -143,11 +141,10 @@
   __ Nop();
   __ Break();
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }
@@ -155,17 +152,16 @@
 
 namespace x86 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  X86Assembler assembler;
+  std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86)));
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
   __ int3();
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }
@@ -173,17 +169,17 @@
 
 namespace x86_64 {
 static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  x86_64::X86_64Assembler assembler;
+  std::unique_ptr<x86_64::X86_64Assembler>
+      assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64)));
 
   // All x86 trampolines call via the Thread* held in gs.
   __ gs()->jmp(x86_64::Address::Absolute(offset, true));
   __ int3();
 
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
+  size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
-  __ FinalizeInstructions(code);
+  assembler->FinalizeInstructions(code);
 
   return entry_stub.release();
 }

diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 3458a44..350efca 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h

@@ -17,7 +17,6 @@
 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_
 
-#include <type_traits>
 #include <vector>
 
 #include "base/bit_utils.h"
@@ -34,47 +33,14 @@
 class Arm32Assembler;
 class Thumb2Assembler;
 
-// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
-class Literal {
+// This class indicates that the label and its uses
+// will fall into a range that is encodable in 16bits on thumb2.
+class NearLabel : public Label {
  public:
-  static constexpr size_t kMaxSize = 8;
-
-  Literal(uint32_t size, const uint8_t* data)
-      : label_(), size_(size) {
-    DCHECK_LE(size, Literal::kMaxSize);
-    memcpy(data_, data, size);
-  }
-
-  template <typename T>
-  T GetValue() const {
-    DCHECK_EQ(size_, sizeof(T));
-    T value;
-    memcpy(&value, data_, sizeof(T));
-    return value;
-  }
-
-  uint32_t GetSize() const {
-    return size_;
-  }
-
-  const uint8_t* GetData() const {
-    return data_;
-  }
-
-  Label* GetLabel() {
-    return &label_;
-  }
-
-  const Label* GetLabel() const {
-    return &label_;
-  }
+  NearLabel() {}
 
  private:
-  Label label_;
-  const uint32_t size_;
-  uint8_t data_[kMaxSize];
-
-  DISALLOW_COPY_AND_ASSIGN(Literal);
+  DISALLOW_COPY_AND_ASSIGN(NearLabel);
 };
 
 class ShifterOperand {
@@ -563,6 +529,9 @@
 
   // Branch instructions.
   virtual void b(Label* label, Condition cond = AL) = 0;
+  virtual void b(NearLabel* label, Condition cond = AL) {
+    b(static_cast<Label*>(label), cond);
+  }
   virtual void bl(Label* label, Condition cond = AL) = 0;
   virtual void blx(Register rm, Condition cond = AL) = 0;
   virtual void bx(Register rm, Condition cond = AL) = 0;
@@ -572,31 +541,9 @@
 
   void Pad(uint32_t bytes);
 
-  // Get the final position of a label after local fixup based on the old position
-  // recorded before FinalizeCode().
-  virtual uint32_t GetAdjustedPosition(uint32_t old_position) = 0;
-
   // Macros.
   // Most of these are pure virtual as they need to be implemented per instruction set.
 
-  // Create a new literal with a given value.
-  // NOTE: Force the template parameter to be explicitly specified. In the absence of
-  // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
-  template <typename T>
-  Literal* NewLiteral(typename std::decay<T>::type value) {
-    static_assert(std::is_integral<T>::value, "T must be an integral type.");
-    return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
-  }
-
-  // Create a new literal with the given data.
-  virtual Literal* NewLiteral(size_t size, const uint8_t* data) = 0;
-
-  // Load literal.
-  virtual void LoadLiteral(Register rt, Literal* literal) = 0;
-  virtual void LoadLiteral(Register rt, Register rt2, Literal* literal) = 0;
-  virtual void LoadLiteral(SRegister sd, Literal* literal) = 0;
-  virtual void LoadLiteral(DRegister dd, Literal* literal) = 0;
-
   // Add signed constant value to rd. May clobber IP.
   virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0;
   virtual void AddConstant(Register rd, Register rn, int32_t value,
@@ -720,6 +667,9 @@
   virtual void Bind(Label* label) = 0;
 
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
+  virtual void CompareAndBranchIfZero(Register r, NearLabel* label) {
+    CompareAndBranchIfZero(r, static_cast<Label*>(label));
+  }
   virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
   //

diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e60ddc..cdf62bf 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc

@@ -1354,41 +1354,6 @@
 }
 
 
-uint32_t Arm32Assembler::GetAdjustedPosition(uint32_t old_position ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
-Literal* Arm32Assembler::NewLiteral(size_t size ATTRIBUTE_UNUSED,
-                                    const uint8_t* data ATTRIBUTE_UNUSED)  {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
-void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED,
-                                 Literal* literal ATTRIBUTE_UNUSED)  {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
-void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, Register rt2 ATTRIBUTE_UNUSED,
-                                 Literal* literal ATTRIBUTE_UNUSED)  {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
-void Arm32Assembler::LoadLiteral(SRegister sd ATTRIBUTE_UNUSED,
-                                 Literal* literal ATTRIBUTE_UNUSED)  {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
-void Arm32Assembler::LoadLiteral(DRegister dd ATTRIBUTE_UNUSED,
-                                 Literal* literal ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unimplemented.";
-  UNREACHABLE();
-}
-
 void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
   AddConstant(rd, rd, value, cond);
 }

diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 1c38eec..3164623 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h

@@ -238,16 +238,7 @@
   // Memory barriers.
   void dmb(DmbOptions flavor) OVERRIDE;
 
-  // Get the final position of a label after local fixup based on the old position
-  // recorded before FinalizeCode().
-  uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;
-
-  Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
-  void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
-  void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
-  void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
-  void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;
-
+  // Macros.
   // Add signed constant value to rd. May clobber IP.
   void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void AddConstant(Register rd, Register rn, int32_t value,

diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index f9e1ac6..26cb6c3 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc

@@ -25,309 +25,6 @@
 namespace art {
 namespace arm {
 
-void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) {
-  CHECK(!label->IsBound());
-
-  while (label->IsLinked()) {
-    FixupId fixup_id = label->Position();                     // The id for linked Fixup.
-    Fixup* fixup = GetFixup(fixup_id);                        // Get the Fixup at this id.
-    fixup->Resolve(bound_pc);                                 // Fixup can be resolved now.
-    // Add this fixup as a dependency of all later fixups.
-    for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) {
-      GetFixup(id)->AddDependent(fixup_id);
-    }
-    uint32_t fixup_location = fixup->GetLocation();
-    uint16_t next = buffer_.Load<uint16_t>(fixup_location);   // Get next in chain.
-    buffer_.Store<int16_t>(fixup_location, 0);
-    label->position_ = next;                                  // Move to next.
-  }
-  label->BindTo(bound_pc);
-}
-
-void Thumb2Assembler::BindLiterals() {
-  // We don't add the padding here, that's done only after adjusting the Fixup sizes.
-  uint32_t code_size = buffer_.Size();
-  for (Literal& lit : literals_) {
-    Label* label = lit.GetLabel();
-    BindLabel(label, code_size);
-    code_size += lit.GetSize();
-  }
-}
-
-void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
-                                          std::deque<FixupId>* fixups_to_recalculate) {
-  uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size);
-  if (adjustment != 0u) {
-    *current_code_size += adjustment;
-    for (FixupId dependent_id : fixup->Dependents()) {
-      Fixup* dependent = GetFixup(dependent_id);
-      dependent->IncreaseAdjustment(adjustment);
-      if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) {
-        buffer_.Store<int16_t>(dependent->GetLocation(), 1);
-        fixups_to_recalculate->push_back(dependent_id);
-      }
-    }
-  }
-}
-
-uint32_t Thumb2Assembler::AdjustFixups() {
-  uint32_t current_code_size = buffer_.Size();
-  std::deque<FixupId> fixups_to_recalculate;
-  if (kIsDebugBuild) {
-    // We will use the placeholders in the buffer_ to mark whether the fixup has
-    // been added to the fixups_to_recalculate. Make sure we start with zeros.
-    for (Fixup& fixup : fixups_) {
-      CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0);
-    }
-  }
-  for (Fixup& fixup : fixups_) {
-    AdjustFixupIfNeeded(&fixup, &current_code_size, &fixups_to_recalculate);
-  }
-  while (!fixups_to_recalculate.empty()) {
-    // Pop the fixup.
-    FixupId fixup_id = fixups_to_recalculate.front();
-    fixups_to_recalculate.pop_front();
-    Fixup* fixup = GetFixup(fixup_id);
-    DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0);
-    buffer_.Store<int16_t>(fixup->GetLocation(), 0);
-    // See if it needs adjustment.
-    AdjustFixupIfNeeded(fixup, &current_code_size, &fixups_to_recalculate);
-  }
-  if (kIsDebugBuild) {
-    // Check that no fixup is marked as being in fixups_to_recalculate anymore.
-    for (Fixup& fixup : fixups_) {
-      CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0);
-    }
-  }
-
-  // Adjust literal pool labels for padding.
-  DCHECK_EQ(current_code_size & 1u, 0u);
-  uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size();
-  if (literals_adjustment != 0u) {
-    for (Literal& literal : literals_) {
-      Label* label = literal.GetLabel();
-      DCHECK(label->IsBound());
-      int old_position = label->Position();
-      label->Reinitialize();
-      label->BindTo(old_position + literals_adjustment);
-    }
-  }
-
-  return current_code_size;
-}
-
-void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) {
-  // Move non-fixup code to its final place and emit fixups.
-  // Process fixups in reverse order so that we don't repeatedly move the same data.
-  size_t src_end = buffer_.Size();
-  size_t dest_end = adjusted_code_size;
-  buffer_.Resize(dest_end);
-  DCHECK_GE(dest_end, src_end);
-  for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) {
-    Fixup* fixup = &*i;
-    if (fixup->GetOriginalSize() == fixup->GetSize()) {
-      // The size of this Fixup didn't change. To avoid moving the data
-      // in small chunks, emit the code to its original position.
-      fixup->Emit(&buffer_, adjusted_code_size);
-      fixup->Finalize(dest_end - src_end);
-    } else {
-      // Move the data between the end of the fixup and src_end to its final location.
-      size_t old_fixup_location = fixup->GetLocation();
-      size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes();
-      size_t data_size = src_end - src_begin;
-      size_t dest_begin  = dest_end - data_size;
-      buffer_.Move(dest_begin, src_begin, data_size);
-      src_end = old_fixup_location;
-      dest_end = dest_begin - fixup->GetSizeInBytes();
-      // Finalize the Fixup and emit the data to the new location.
-      fixup->Finalize(dest_end - src_end);
-      fixup->Emit(&buffer_, adjusted_code_size);
-    }
-  }
-  CHECK_EQ(src_end, dest_end);
-}
-
-void Thumb2Assembler::EmitLiterals() {
-  if (!literals_.empty()) {
-    // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment.
-    // We don't support byte and half-word literals.
-    uint32_t code_size = buffer_.Size();
-    DCHECK_EQ(code_size & 1u, 0u);
-    if ((code_size & 2u) != 0u) {
-      Emit16(0);
-    }
-    for (Literal& literal : literals_) {
-      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-      DCHECK_EQ(static_cast<size_t>(literal.GetLabel()->Position()), buffer_.Size());
-      DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u);
-      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
-        buffer_.Emit<uint8_t>(literal.GetData()[i]);
-      }
-    }
-  }
-}
-
-inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
-  DCHECK_EQ(offset & 1, 0);
-  int16_t encoding = B15 | B14;
-  if (cond != AL) {
-    DCHECK(IsInt<9>(offset));
-    encoding |= B12 |  (static_cast<int32_t>(cond) << 8) | ((offset >> 1) & 0xff);
-  } else {
-    DCHECK(IsInt<12>(offset));
-    encoding |= B13 | ((offset >> 1) & 0x7ff);
-  }
-  return encoding;
-}
-
-inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) {
-  DCHECK_EQ(offset & 1, 0);
-  int32_t s = (offset >> 31) & 1;   // Sign bit.
-  int32_t encoding = B31 | B30 | B29 | B28 | B15 |
-      (s << 26) |                   // Sign bit goes to bit 26.
-      ((offset >> 1) & 0x7ff);      // imm11 goes to bits 0-10.
-  if (cond != AL) {
-    DCHECK(IsInt<21>(offset));
-    // Encode cond, move imm6 from bits 12-17 to bits 16-21 and move J1 and J2.
-    encoding |= (static_cast<int32_t>(cond) << 22) | ((offset & 0x3f000) << (16 - 12)) |
-        ((offset & (1 << 19)) >> (19 - 13)) |   // Extract J1 from bit 19 to bit 13.
-        ((offset & (1 << 18)) >> (18 - 11));    // Extract J2 from bit 18 to bit 11.
-  } else {
-    DCHECK(IsInt<25>(offset));
-    int32_t j1 = ((offset >> 23) ^ s ^ 1) & 1;  // Calculate J1 from I1 extracted from bit 23.
-    int32_t j2 = ((offset >> 22)^ s ^ 1) & 1;   // Calculate J2 from I2 extracted from bit 22.
-    // Move imm10 from bits 12-21 to bits 16-25 and add J1 and J2.
-    encoding |= B12 | ((offset & 0x3ff000) << (16 - 12)) |
-        (j1 << 13) | (j2 << 11);
-  }
-  return encoding;
-}
-
-inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) {
-  DCHECK(!IsHighRegister(rn));
-  DCHECK_EQ(offset & 1, 0);
-  DCHECK(IsUint<7>(offset));
-  DCHECK(cond == EQ || cond == NE);
-  return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) |
-      ((offset & 0x3e) << (3 - 1)) |    // Move imm5 from bits 1-5 to bits 3-7.
-      ((offset & 0x40) << (9 - 6));     // Move i from bit 6 to bit 11
-}
-
-inline int16_t Thumb2Assembler::CmpRnImm8Encoding16(Register rn, int32_t value) {
-  DCHECK(!IsHighRegister(rn));
-  DCHECK(IsUint<8>(value));
-  return B13 | B11 | (rn << 8) | value;
-}
-
-inline int16_t Thumb2Assembler::AddRdnRmEncoding16(Register rdn, Register rm) {
-  // The high bit of rn is moved across 4-bit rm.
-  return B14 | B10 | (static_cast<int32_t>(rm) << 3) |
-      (static_cast<int32_t>(rdn) & 7) | ((static_cast<int32_t>(rdn) & 8) << 4);
-}
-
-inline int32_t Thumb2Assembler::MovwEncoding32(Register rd, int32_t value) {
-  DCHECK(IsUint<16>(value));
-  return B31 | B30 | B29 | B28 | B25 | B22 |
-      (static_cast<int32_t>(rd) << 8) |
-      ((value & 0xf000) << (16 - 12)) |   // Move imm4 from bits 12-15 to bits 16-19.
-      ((value & 0x0800) << (26 - 11)) |   // Move i from bit 11 to bit 26.
-      ((value & 0x0700) << (12 - 8)) |    // Move imm3 from bits 8-10 to bits 12-14.
-      (value & 0xff);                     // Keep imm8 in bits 0-7.
-}
-
-inline int32_t Thumb2Assembler::MovtEncoding32(Register rd, int32_t value) {
-  DCHECK_EQ(value & 0xffff, 0);
-  int32_t movw_encoding = MovwEncoding32(rd, (value >> 16) & 0xffff);
-  return movw_encoding | B25 | B23;
-}
-
-inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) {
-  uint32_t mod_imm = ModifiedImmediate(value);
-  DCHECK_NE(mod_imm, kInvalidModifiedImmediate);
-  return B31 | B30 | B29 | B28 | B22 | B19 | B18 | B17 | B16 |
-      (static_cast<int32_t>(rd) << 8) | static_cast<int32_t>(mod_imm);
-}
-
-inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) {
-  DCHECK(!IsHighRegister(rt));
-  DCHECK_EQ(offset & 3, 0);
-  DCHECK(IsUint<10>(offset));
-  return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2);
-}
-
-inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) {
-  // NOTE: We don't support negative offset, i.e. U=0 (B23).
-  return LdrRtRnImm12Encoding(rt, PC, offset);
-}
-
-inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
-  CHECK(IsUint<10>(offset));
-  return B31 | B30 | B29 | B27 |
-      B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 |
-      (static_cast<int32_t>(rn) << 16) | (static_cast<int32_t>(rt) << 12) |
-      (static_cast<int32_t>(rt2) << 8) | (offset >> 2);
-}
-
-inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
-  CHECK(IsUint<10>(offset));
-  return B31 | B30 | B29 | B27 | B26 | B24 |
-      B23 /* U = 1 */ | B20 | B11 | B9 |
-      (static_cast<int32_t>(rn) << 16) |
-      ((static_cast<int32_t>(sd) & 0x01) << (22 - 0)) |   // Move D from bit 0 to bit 22.
-      ((static_cast<int32_t>(sd) & 0x1e) << (12 - 1)) |   // Move Vd from bits 1-4 to bits 12-15.
-      (offset >> 2);
-}
-
-inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) {
-  DCHECK_EQ(offset & 3, 0);
-  CHECK(IsUint<10>(offset));
-  return B31 | B30 | B29 | B27 | B26 | B24 |
-      B23 /* U = 1 */ | B20 | B11 | B9 | B8 |
-      (rn << 16) |
-      ((static_cast<int32_t>(dd) & 0x10) << (22 - 4)) |   // Move D from bit 4 to bit 22.
-      ((static_cast<int32_t>(dd) & 0x0f) << (12 - 0)) |   // Move Vd from bits 0-3 to bits 12-15.
-      (offset >> 2);
-}
-
-inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) {
-  DCHECK(!IsHighRegister(rt));
-  DCHECK(!IsHighRegister(rn));
-  DCHECK_EQ(offset & 3, 0);
-  DCHECK(IsUint<7>(offset));
-  return B14 | B13 | B11 |
-      (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) |
-      (offset << (6 - 2));                // Move imm5 from bits 2-6 to bits 6-10.
-}
-
-int32_t Thumb2Assembler::Fixup::LoadWideOrFpEncoding(Register rbase, int32_t offset) const {
-  switch (type_) {
-    case kLoadLiteralWide:
-      return LdrdEncoding32(rn_, rt2_, rbase, offset);
-    case kLoadFPLiteralSingle:
-      return VldrsEncoding32(sd_, rbase, offset);
-    case kLoadFPLiteralDouble:
-      return VldrdEncoding32(dd_, rbase, offset);
-    default:
-      LOG(FATAL) << "Unexpected type: " << static_cast<int>(type_);
-      UNREACHABLE();
-  }
-}
-
-inline int32_t Thumb2Assembler::LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset) {
-  DCHECK(IsUint<12>(offset));
-  return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset;
-}
-
-void Thumb2Assembler::FinalizeCode() {
-  ArmAssembler::FinalizeCode();
-  BindLiterals();
-  uint32_t adjusted_code_size = AdjustFixups();
-  EmitFixups(adjusted_code_size);
-  EmitLiterals();
-}
-
 bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
                                             Register rn ATTRIBUTE_UNUSED,
                                             Opcode opcode,
@@ -974,11 +671,17 @@
   EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
 }
 
+
 void Thumb2Assembler::b(Label* label, Condition cond) {
   EmitBranch(cond, label, false, false);
 }
 
 
+void Thumb2Assembler::b(NearLabel* label, Condition cond) {
+  EmitBranch(cond, label, false, false, /* is_near */ true);
+}
+
+
 void Thumb2Assembler::bl(Label* label, Condition cond) {
   CheckCondition(cond);
   EmitBranch(cond, label, true, false);
@@ -1605,359 +1308,80 @@
   }
 }
 
-inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) {
-  switch (size) {
-    case kBranch16Bit:
-      return 2u;
-    case kBranch32Bit:
-      return 4u;
 
-    case kCbxz16Bit:
-      return 2u;
-    case kCbxz32Bit:
-      return 4u;
-    case kCbxz48Bit:
-      return 6u;
 
-    case kLiteral1KiB:
-      return 2u;
-    case kLiteral4KiB:
-      return 4u;
-    case kLiteral64KiB:
-      return 8u;
-    case kLiteral1MiB:
-      return 10u;
-    case kLiteralFar:
-      return 14u;
+void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
+  bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink;
+  bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX;
+  int32_t offset = target_ - location_;
 
-    case kLongOrFPLiteral1KiB:
-      return 4u;
-    case kLongOrFPLiteral256KiB:
-      return 10u;
-    case kLongOrFPLiteralFar:
-      return 14u;
-  }
-  LOG(FATAL) << "Unexpected size: " << static_cast<int>(size);
-  UNREACHABLE();
-}
-
-inline uint32_t Thumb2Assembler::Fixup::GetOriginalSizeInBytes() const {
-  return SizeInBytes(original_size_);
-}
-
-inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const {
-  return SizeInBytes(size_);
-}
-
-inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) {
-  // The code size must be a multiple of 2.
-  DCHECK_EQ(current_code_size & 1u, 0u);
-  // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool.
-  return current_code_size & 2;
-}
-
-inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) const {
-  static constexpr int32_t int32_min = std::numeric_limits<int32_t>::min();
-  static constexpr int32_t int32_max = std::numeric_limits<int32_t>::max();
-  DCHECK_LE(target_, static_cast<uint32_t>(int32_max));
-  DCHECK_LE(location_, static_cast<uint32_t>(int32_max));
-  DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max));
-  int32_t diff = static_cast<int32_t>(target_) - static_cast<int32_t>(location_);
-  if (target_ > location_) {
-    DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max - diff));
-    diff += static_cast<int32_t>(adjustment_);
+  if (size_ == k32Bit) {
+    int32_t encoding = B31 | B30 | B29 | B28 | B15;
+    if (link) {
+      // BL or BLX immediate.
+      encoding |= B14;
+      if (!x) {
+        encoding |= B12;
+      } else {
+        // Bottom bit of offset must be 0.
+        CHECK_EQ((offset & 1), 0);
+      }
+    } else {
+      if (x) {
+        LOG(FATAL) << "Invalid use of BX";
+        UNREACHABLE();
+      } else {
+        if (cond_ == AL) {
+          // Can use the T4 encoding allowing a 24 bit offset.
+          if (!x) {
+            encoding |= B12;
+          }
+        } else {
+          // Must be T3 encoding with a 20 bit offset.
+          encoding |= cond_ << 22;
+        }
+      }
+    }
+    encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding);
+    buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16));
+    buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff));
   } else {
-    DCHECK_LE(int32_min + static_cast<int32_t>(adjustment_), diff);
-    diff -= static_cast<int32_t>(adjustment_);
-  }
-  // The default PC adjustment for Thumb2 is 4 bytes.
-  DCHECK_GE(diff, int32_min + 4);
-  diff -= 4;
-  // Add additional adjustment for instructions preceding the PC usage, padding
-  // before the literal pool and rounding down the PC for literal loads.
-  switch (GetSize()) {
-    case kBranch16Bit:
-    case kBranch32Bit:
-      break;
-
-    case kCbxz16Bit:
-      break;
-    case kCbxz32Bit:
-    case kCbxz48Bit:
-      DCHECK_GE(diff, int32_min + 2);
-      diff -= 2;        // Extra CMP Rn, #0, 16-bit.
-      break;
-
-    case kLiteral1KiB:
-    case kLiteral4KiB:
-    case kLongOrFPLiteral1KiB:
-      DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2));
-      diff += LiteralPoolPaddingSize(current_code_size);
-      // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
-      // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target
-      // being aligned, current PC alignment can be inferred from diff.
-      DCHECK_EQ(diff & 1, 0);
-      diff = diff + (diff & 2);
-      DCHECK_GE(diff, 0);
-      break;
-    case kLiteral1MiB:
-    case kLiteral64KiB:
-    case kLongOrFPLiteral256KiB:
-      DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
-      diff -= 4;        // One extra 32-bit MOV.
-      diff += LiteralPoolPaddingSize(current_code_size);
-      break;
-    case kLiteralFar:
-    case kLongOrFPLiteralFar:
-      DCHECK_GE(diff, 8);  // The target must be at least 4 bytes after the ADD rX, PC.
-      diff -= 8;        // Extra MOVW+MOVT; both 32-bit.
-      diff += LiteralPoolPaddingSize(current_code_size);
-      break;
-  }
-  return diff;
-}
-
-inline size_t Thumb2Assembler::Fixup::IncreaseSize(Size new_size) {
-  DCHECK_NE(target_, kUnresolved);
-  Size old_size = size_;
-  size_ = new_size;
-  DCHECK_GT(SizeInBytes(new_size), SizeInBytes(old_size));
-  size_t adjustment = SizeInBytes(new_size) - SizeInBytes(old_size);
-  if (target_ > location_) {
-    adjustment_ += adjustment;
-  }
-  return adjustment;
-}
-
-uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) {
-  uint32_t old_code_size = current_code_size;
-  switch (GetSize()) {
-    case kBranch16Bit:
-      if (IsInt(cond_ != AL ? 9 : 12, GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kBranch32Bit);
-      FALLTHROUGH_INTENDED;
-    case kBranch32Bit:
-      // We don't support conditional branches beyond +-1MiB
-      // or unconditional branches beyond +-16MiB.
-      break;
-
-    case kCbxz16Bit:
-      if (IsUint<7>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kCbxz32Bit);
-      FALLTHROUGH_INTENDED;
-    case kCbxz32Bit:
-      if (IsInt<9>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kCbxz48Bit);
-      FALLTHROUGH_INTENDED;
-    case kCbxz48Bit:
-      // We don't support conditional branches beyond +-1MiB.
-      break;
-
-    case kLiteral1KiB:
-      DCHECK(!IsHighRegister(rn_));
-      if (IsUint<10>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLiteral4KiB);
-      FALLTHROUGH_INTENDED;
-    case kLiteral4KiB:
-      if (IsUint<12>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLiteral64KiB);
-      FALLTHROUGH_INTENDED;
-    case kLiteral64KiB:
-      // Can't handle high register which we can encounter by fall-through from kLiteral4KiB.
-      if (!IsHighRegister(rn_) && IsUint<16>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLiteral1MiB);
-      FALLTHROUGH_INTENDED;
-    case kLiteral1MiB:
-      if (IsUint<20>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLiteralFar);
-      FALLTHROUGH_INTENDED;
-    case kLiteralFar:
-      // This encoding can reach any target.
-      break;
-
-    case kLongOrFPLiteral1KiB:
-      if (IsUint<10>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLongOrFPLiteral256KiB);
-      FALLTHROUGH_INTENDED;
-    case kLongOrFPLiteral256KiB:
-      if (IsUint<18>(GetOffset(current_code_size))) {
-        break;
-      }
-      current_code_size += IncreaseSize(kLongOrFPLiteralFar);
-      FALLTHROUGH_INTENDED;
-    case kLongOrFPLiteralFar:
-      // This encoding can reach any target.
-      break;
-  }
-  return current_code_size - old_code_size;
-}
-
-void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const {
-  switch (GetSize()) {
-    case kBranch16Bit: {
-      DCHECK(type_ == kUnconditional || type_ == kConditional);
-      DCHECK_EQ(type_ == kConditional, cond_ != AL);
-      int16_t encoding = BEncoding16(GetOffset(code_size), cond_);
+    if (IsCompareAndBranch()) {
+      offset -= 4;
+      uint16_t i = (offset >> 6) & 1;
+      uint16_t imm5 = (offset >> 1) & 31U /* 0b11111 */;
+      int16_t encoding = B15 | B13 | B12 |
+            (type_ ==  kCompareAndBranchNonZero ? B11 : 0) |
+            static_cast<uint32_t>(rn_) |
+            B8 |
+            i << 9 |
+            imm5 << 3;
       buffer->Store<int16_t>(location_, encoding);
-      break;
-    }
-    case kBranch32Bit: {
-      DCHECK(type_ == kConditional || type_ == kUnconditional ||
-             type_ == kUnconditionalLink || type_ == kUnconditionalLinkX);
-      DCHECK_EQ(type_ == kConditional, cond_ != AL);
-      int32_t encoding = BEncoding32(GetOffset(code_size), cond_);
-      if (type_ == kUnconditionalLink) {
-        DCHECK_NE(encoding & B12, 0);
-        encoding |= B14;
-      } else if (type_ == kUnconditionalLinkX) {
-        DCHECK_NE(encoding & B12, 0);
-        encoding ^= B14 | B12;
+    } else {
+      offset -= 4;    // Account for PC offset.
+      int16_t encoding;
+      // 16 bit.
+      if (cond_ == AL) {
+        encoding = B15 | B14 | B13 |
+            ((offset >> 1) & 0x7ff);
+      } else {
+        encoding = B15 | B14 | B12 |
+            cond_ << 8 | ((offset >> 1) & 0xff);
       }
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
-      break;
-    }
-
-    case kCbxz16Bit: {
-      DCHECK(type_ == kCompareAndBranchXZero);
-      int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_);
       buffer->Store<int16_t>(location_, encoding);
-      break;
-    }
-    case kCbxz32Bit: {
-      DCHECK(type_ == kCompareAndBranchXZero);
-      DCHECK(cond_ == EQ || cond_ == NE);
-      int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
-      int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2, b_encoding);
-      break;
-    }
-    case kCbxz48Bit: {
-      DCHECK(type_ == kCompareAndBranchXZero);
-      DCHECK(cond_ == EQ || cond_ == NE);
-      int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0);
-      int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_);
-      buffer->Store<int16_t>(location_, cmp_encoding);
-      buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff));
-      break;
-    }
-
-    case kLiteral1KiB: {
-      DCHECK(type_ == kLoadLiteralNarrow);
-      int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding);
-      break;
-    }
-    case kLiteral4KiB: {
-      DCHECK(type_ == kLoadLiteralNarrow);
-      // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly.
-      int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size));
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
-      break;
-    }
-    case kLiteral64KiB: {
-      DCHECK(type_ == kLoadLiteralNarrow);
-      int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
-      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      int16_t ldr_encoding = LdrRtRnImm5Encoding16(rn_, rn_, 0);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding);
-      break;
-    }
-    case kLiteral1MiB: {
-      DCHECK(type_ == kLoadLiteralNarrow);
-      int32_t offset = GetOffset(code_size);
-      int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff);
-      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff);
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
-      break;
-    }
-    case kLiteralFar: {
-      DCHECK(type_ == kLoadLiteralNarrow);
-      int32_t offset = GetOffset(code_size);
-      int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
-      int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
-      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
-      int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0);
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
-      break;
-    }
-
-    case kLongOrFPLiteral1KiB: {
-      int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
-      buffer->Store<int16_t>(location_, encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
-      break;
-    }
-    case kLongOrFPLiteral256KiB: {
-      int32_t offset = GetOffset(code_size);
-      int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff);
-      int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
-      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff);    // DCHECKs type_.
-      buffer->Store<int16_t>(location_, mov_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff));
-      break;
-    }
-    case kLongOrFPLiteralFar: {
-      int32_t offset = GetOffset(code_size);
-      int32_t movw_encoding = MovwEncoding32(IP, offset & 0xffff);
-      int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff);
-      int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
-      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0);                 // DCHECKs type_.
-      buffer->Store<int16_t>(location_, movw_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
-      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
-      buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16);
-      buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff));
-      break;
     }
   }
 }
 
+
 uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) {
   CHECK(IsLowRegister(rn));
   uint32_t location = buffer_.Size();
 
   // This is always unresolved as it must be a forward branch.
   Emit16(prev);      // Previous link.
-  return AddFixup(Fixup::CompareAndBranch(location, rn, n ? NE : EQ));
+  return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero,
+      location, rn);
 }
 
 
@@ -2195,53 +1619,47 @@
   }
 }
 
-void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) {
-  bool use32bit = IsForced32Bit() || !CanRelocateBranches();
+
+void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) {
   uint32_t pc = buffer_.Size();
-  Fixup::Type branch_type;
+  Branch::Type branch_type;
   if (cond == AL) {
     if (link) {
-      use32bit = true;
       if (x) {
-        branch_type = Fixup::kUnconditionalLinkX;      // BLX.
+        branch_type = Branch::kUnconditionalLinkX;      // BLX.
       } else {
-        branch_type = Fixup::kUnconditionalLink;       // BX.
+        branch_type = Branch::kUnconditionalLink;       // BX.
       }
     } else {
-      branch_type = Fixup::kUnconditional;             // B.
+      branch_type = Branch::kUnconditional;             // B.
     }
   } else {
-    branch_type = Fixup::kConditional;                 // B<cond>.
+    branch_type = Branch::kConditional;                 // B<cond>.
   }
 
-  Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit;
-  FixupId branch_id = AddFixup(Fixup::Branch(pc, branch_type, size, cond));
-
   if (label->IsBound()) {
-    // The branch is to a bound label which means that it's a backwards branch.
-    // Record this branch as a dependency of all Fixups between the label and the branch.
-    GetFixup(branch_id)->Resolve(label->Position());
-    for (FixupId fixup_id = branch_id; fixup_id != 0u; ) {
-      --fixup_id;
-      Fixup* fixup = GetFixup(fixup_id);
-      DCHECK_GE(label->Position(), 0);
-      if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) {
-        break;
-      }
-      fixup->AddDependent(branch_id);
-    }
-    Emit16(0);
-  } else {
-    // Branch target is an unbound label. Add it to a singly-linked list maintained within
-    // the code with the label serving as the head.
-    Emit16(static_cast<uint16_t>(label->position_));
-    label->LinkTo(branch_id);
-  }
+    Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond);  // Resolved branch.
 
-  if (use32bit) {
-    Emit16(0);
+    // The branch is to a bound label which means that it's a backwards branch.  We know the
+    // current size of it so we can emit the appropriate space.  Note that if it's a 16 bit
+    // branch the size may change if it so happens that other branches change size that change
+    // the distance to the target and that distance puts this branch over the limit for 16 bits.
+    if (size == Branch::k16Bit) {
+      Emit16(0);          // Space for a 16 bit branch.
+    } else {
+      Emit32(0);            // Space for a 32 bit branch.
+    }
+  } else {
+    // Branch is to an unbound label.  Emit space for it.
+    uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near);    // Unresolved branch.
+    if (force_32bit_ || (!CanRelocateBranches() && !is_near)) {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+      Emit16(0);                   // another 16 bits.
+    } else {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+    }
+    label->LinkTo(branch_id);           // Link to the branch ID.
   }
-  DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes());
 }
 
 
@@ -2856,8 +2274,82 @@
 }
 
 
+// A branch has changed size.  Make a hole for it.
+void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) {
+  // Move the contents of the buffer using: Move(newposition, oldposition)
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Move(location + delta, location);
+}
+
+
 void Thumb2Assembler::Bind(Label* label) {
-  BindLabel(label, buffer_.Size());
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+  std::vector<Branch*> changed_branches;
+
+  while (label->IsLinked()) {
+    uint16_t position = label->Position();                  // Branch id for linked branch.
+    Branch* branch = GetBranch(position);                   // Get the branch at this id.
+    bool changed = branch->Resolve(bound_pc);               // Branch can be resolved now.
+    uint32_t branch_location = branch->GetLocation();
+    uint16_t next = buffer_.Load<uint16_t>(branch_location);       // Get next in chain.
+    if (changed) {
+      DCHECK(CanRelocateBranches());
+      MakeHoleForBranch(branch->GetLocation(), 2);
+      if (branch->IsCompareAndBranch()) {
+        // A cbz/cbnz instruction has changed size.  There is no valid encoding for
+        // a 32 bit cbz/cbnz so we need to change this to an instruction pair:
+        // cmp rn, #0
+        // b<eq|ne> target
+        bool n = branch->GetType() == Branch::kCompareAndBranchNonZero;
+        Condition cond = n ? NE : EQ;
+        branch->Move(2);      // Move the branch forward by 2 bytes.
+        branch->ResetTypeAndCondition(Branch::kConditional, cond);
+        branch->ResetSize(Branch::k16Bit);
+
+        // Now add a compare instruction in the place the branch was.
+        buffer_.Store<int16_t>(branch_location,
+                               B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8);
+
+        // Since have moved made a hole in the code we need to reload the
+        // current pc.
+        bound_pc = buffer_.Size();
+
+        // Now resolve the newly added branch.
+        changed = branch->Resolve(bound_pc);
+        if (changed) {
+          MakeHoleForBranch(branch->GetLocation(), 2);
+          changed_branches.push_back(branch);
+        }
+      } else {
+        changed_branches.push_back(branch);
+      }
+    }
+    label->position_ = next;                                // Move to next.
+  }
+  label->BindTo(bound_pc);
+
+  // Now relocate any changed branches.  Do this until there are no more changes.
+  std::vector<Branch*> branches_to_process = changed_branches;
+  while (branches_to_process.size() != 0) {
+    changed_branches.clear();
+    for (auto& changed_branch : branches_to_process) {
+      for (auto& branch : branches_) {
+        bool changed = branch->Relocate(changed_branch->GetLocation(), 2);
+        if (changed) {
+          changed_branches.push_back(branch);
+        }
+      }
+      branches_to_process = changed_branches;
+    }
+  }
+}
+
+
+void Thumb2Assembler::EmitBranches() {
+  for (auto& branch : branches_) {
+    branch->Emit(&buffer_);
+  }
 }
 
 
@@ -2995,85 +2487,6 @@
   return imm32;
 }
 
-uint32_t Thumb2Assembler::GetAdjustedPosition(uint32_t old_position) {
-  // We can reconstruct the adjustment by going through all the fixups from the beginning
-  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
-  // with increasing old_position, we can use the data from last AdjustedPosition() to
-  // continue where we left off and the whole loop should be O(m+n) where m is the number
-  // of positions to adjust and n is the number of fixups.
-  if (old_position < last_old_position_) {
-    last_position_adjustment_ = 0u;
-    last_old_position_ = 0u;
-    last_fixup_id_ = 0u;
-  }
-  while (last_fixup_id_ != fixups_.size()) {
-    Fixup* fixup = GetFixup(last_fixup_id_);
-    if (fixup->GetLocation() >= old_position + last_position_adjustment_) {
-      break;
-    }
-    if (fixup->GetSize() != fixup->GetOriginalSize()) {
-      last_position_adjustment_ += fixup->GetSizeInBytes() - fixup->GetOriginalSizeInBytes();
-    }
-     ++last_fixup_id_;
-  }
-  last_old_position_ = old_position;
-  return old_position + last_position_adjustment_;
-}
-
-Literal* Thumb2Assembler::NewLiteral(size_t size, const uint8_t* data)  {
-  DCHECK(size == 4u || size == 8u) << size;
-  literals_.emplace_back(size, data);
-  return &literals_.back();
-}
-
-void Thumb2Assembler::LoadLiteral(Register rt, Literal* literal)  {
-  DCHECK_EQ(literal->GetSize(), 4u);
-  DCHECK(!literal->GetLabel()->IsBound());
-  bool use32bit = IsForced32Bit() || IsHighRegister(rt);
-  uint32_t location = buffer_.Size();
-  Fixup::Size size = use32bit ? Fixup::kLiteral4KiB : Fixup::kLiteral1KiB;
-  FixupId fixup_id = AddFixup(Fixup::LoadNarrowLiteral(location, rt, size));
-  Emit16(static_cast<uint16_t>(literal->GetLabel()->position_));
-  literal->GetLabel()->LinkTo(fixup_id);
-  if (use32bit) {
-    Emit16(0);
-  }
-  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
-}
-
-void Thumb2Assembler::LoadLiteral(Register rt, Register rt2, Literal* literal)  {
-  DCHECK_EQ(literal->GetSize(), 8u);
-  DCHECK(!literal->GetLabel()->IsBound());
-  uint32_t location = buffer_.Size();
-  FixupId fixup_id =
-      AddFixup(Fixup::LoadWideLiteral(location, rt, rt2, Fixup::kLongOrFPLiteral1KiB));
-  Emit16(static_cast<uint16_t>(literal->GetLabel()->position_));
-  literal->GetLabel()->LinkTo(fixup_id);
-  Emit16(0);
-  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
-}
-
-void Thumb2Assembler::LoadLiteral(SRegister sd, Literal* literal)  {
-  DCHECK_EQ(literal->GetSize(), 4u);
-  DCHECK(!literal->GetLabel()->IsBound());
-  uint32_t location = buffer_.Size();
-  FixupId fixup_id = AddFixup(Fixup::LoadSingleLiteral(location, sd, Fixup::kLongOrFPLiteral1KiB));
-  Emit16(static_cast<uint16_t>(literal->GetLabel()->position_));
-  literal->GetLabel()->LinkTo(fixup_id);
-  Emit16(0);
-  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
-}
-
-void Thumb2Assembler::LoadLiteral(DRegister dd, Literal* literal) {
-  DCHECK_EQ(literal->GetSize(), 8u);
-  DCHECK(!literal->GetLabel()->IsBound());
-  uint32_t location = buffer_.Size();
-  FixupId fixup_id = AddFixup(Fixup::LoadDoubleLiteral(location, dd, Fixup::kLongOrFPLiteral1KiB));
-  Emit16(static_cast<uint16_t>(literal->GetLabel()->position_));
-  literal->GetLabel()->LinkTo(fixup_id);
-  Emit16(0);
-  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
-}
 
 void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
   AddConstant(rd, rd, value, cond);
@@ -3350,6 +2763,16 @@
 }
 
 
+void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) {
+  if (IsLowRegister(r)) {
+    cbz(r, label);
+  } else {
+    cmp(r, ShifterOperand(0));
+    b(label, EQ);
+  }
+}
+
+
 void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
   if (CanRelocateBranches() && IsLowRegister(r)) {
     cbnz(r, label);

diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5e6969b..2382b74 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h

@@ -17,7 +17,6 @@
 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
 
-#include <deque>
 #include <vector>
 
 #include "base/logging.h"
@@ -35,15 +34,13 @@
       : can_relocate_branches_(can_relocate_branches),
         force_32bit_(false),
         it_cond_index_(kNoItCondition),
-        next_condition_(AL),
-        fixups_(),
-        literals_(),
-        last_position_adjustment_(0u),
-        last_old_position_(0u),
-        last_fixup_id_(0u) {
+        next_condition_(AL) {
   }
 
   virtual ~Thumb2Assembler() {
+    for (auto& branch : branches_) {
+      delete branch;
+    }
   }
 
   bool IsThumb() const OVERRIDE {
@@ -58,7 +55,10 @@
     return can_relocate_branches_;
   }
 
-  void FinalizeCode() OVERRIDE;
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    EmitBranches();
+    Assembler::FinalizeInstructions(region);
+  }
 
   // Data-processing instructions.
   void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
@@ -238,6 +238,7 @@
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL);
+  void b(NearLabel* label, Condition cond = AL);
   void bl(Label* label, Condition cond = AL);
   void blx(Label* label);
   void blx(Register rm, Condition cond = AL) OVERRIDE;
@@ -272,23 +273,13 @@
   void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE;
   void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
 
   // Memory barriers.
   void dmb(DmbOptions flavor) OVERRIDE;
 
-  // Get the final position of a label after local fixup based on the old position
-  // recorded before FinalizeCode().
-  uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;
-
-  using ArmAssembler::NewLiteral;  // Make the helper template visible.
-
-  Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
-  void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
-  void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
-  void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
-  void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;
-
+  // Macros.
   // Add signed constant value to rd. May clobber IP.
   void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void AddConstant(Register rd, Register rn, int32_t value,
@@ -349,244 +340,6 @@
   }
 
  private:
-  typedef uint16_t FixupId;
-
-  // Fixup: branches and literal pool references.
-  //
-  // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This
-  // depends on both the type of branch and the offset to which it is branching. The 16-bit
-  // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare
-  // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be
-  // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence
-  // of instructions to make up for the limited range of load literal instructions (up to
-  // 4KiB for the 32-bit variant). When generating code for these insns we don't know the
-  // size before hand, so we assume it is the smallest available size and determine the final
-  // code offsets and sizes and emit code in FinalizeCode().
-  //
-  // To handle this, we keep a record of every branch and literal pool load in the program.
-  // The actual instruction encoding for these is delayed until we know the final size of
-  // every instruction. When we bind a label to a branch we don't know the final location yet
-  // as some preceding instructions may need to be expanded, so we record a non-final offset.
-  // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of
-  // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with
-  // target on the other side of the expanded insn, as their offsets change and this may
-  // trigger further expansion.
-  //
-  // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the
-  // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing
-  // to it, using the fixup ids as links. The first link is stored in the label's position
-  // (the label is linked but not bound), the following links are stored in the code buffer,
-  // in the placeholder where we will eventually emit the actual code.
-
-  class Fixup {
-   public:
-    // Branch type.
-    enum Type : uint8_t {
-      kConditional,               // B<cond>.
-      kUnconditional,             // B.
-      kUnconditionalLink,         // BL.
-      kUnconditionalLinkX,        // BLX.
-      kCompareAndBranchXZero,     // cbz/cbnz.
-      kLoadLiteralNarrow,         // Load narrrow integer literal.
-      kLoadLiteralWide,           // Load wide integer literal.
-      kLoadFPLiteralSingle,       // Load FP literal single.
-      kLoadFPLiteralDouble,       // Load FP literal double.
-    };
-
-    // Calculated size of branch instruction based on type and offset.
-    enum Size : uint8_t {
-      // Branch variants.
-      kBranch16Bit,
-      kBranch32Bit,
-      // NOTE: We don't support branches which would require multiple instructions, i.e.
-      // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB.
-
-      // CBZ/CBNZ variants.
-      kCbxz16Bit,   // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset.
-      kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
-      kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.
-
-      // Load integer literal variants.
-      // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
-      kLiteral1KiB,
-      // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes.
-      kLiteral4KiB,
-      // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes.
-      kLiteral64KiB,
-      // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes.
-      kLiteral1MiB,
-      // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit.
-      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
-      kLiteralFar,
-
-      // Load long or FP literal variants.
-      // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
-      kLongOrFPLiteral1KiB,
-      // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
-      kLongOrFPLiteral256KiB,
-      // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
-      kLongOrFPLiteralFar,
-    };
-
-    // Unresolved branch possibly with a condition.
-    static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit,
-                        Condition cond = AL) {
-      DCHECK(type == kConditional || type == kUnconditional ||
-             type == kUnconditionalLink || type == kUnconditionalLinkX);
-      DCHECK(size == kBranch16Bit || size == kBranch32Bit);
-      DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional));
-      return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister,
-                   cond, type, size, location);
-    }
-
-    // Unresolved compare-and-branch instruction with a register and condition (EQ or NE).
-    static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) {
-      DCHECK(cond == EQ || cond == NE);
-      return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister,
-                   cond, kCompareAndBranchXZero, kCbxz16Bit, location);
-    }
-
-    // Load narrow literal.
-    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) {
-      DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
-             size == kLiteral1MiB || size == kLiteralFar);
-      DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
-      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
-                   AL, kLoadLiteralNarrow, size, location);
-    }
-
-    // Load wide literal.
-    static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
-                                 Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
-             size == kLongOrFPLiteralFar);
-      DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
-      return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
-                   AL, kLoadLiteralWide, size, location);
-    }
-
-    // Load FP single literal.
-    static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
-                                   Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
-             size == kLongOrFPLiteralFar);
-      return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
-                   AL, kLoadFPLiteralSingle, size, location);
-    }
-
-    // Load FP double literal.
-    static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
-                                   Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
-             size == kLongOrFPLiteralFar);
-      return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
-                   AL, kLoadFPLiteralDouble, size, location);
-    }
-
-    Type GetType() const {
-      return type_;
-    }
-
-    Size GetOriginalSize() const {
-      return original_size_;
-    }
-
-    Size GetSize() const {
-      return size_;
-    }
-
-    uint32_t GetOriginalSizeInBytes() const;
-
-    uint32_t GetSizeInBytes() const;
-
-    uint32_t GetLocation() const {
-      return location_;
-    }
-
-    uint32_t GetAdjustment() const {
-      return adjustment_;
-    }
-
-    const std::vector<FixupId>& Dependents() const {
-      return dependents_;
-    }
-
-    void AddDependent(FixupId dependent_id) {
-      dependents_.push_back(dependent_id);
-    }
-
-    // Resolve a branch when the target is known.
-    void Resolve(uint32_t target) {
-      DCHECK_EQ(target_, kUnresolved);
-      DCHECK_NE(target, kUnresolved);
-      target_ = target;
-    }
-
-    // Check if the current size is OK for current location_, target_ and adjustment_.
-    // If not, increase the size. Return the size increase, 0 if unchanged.
-    // If the target if after this Fixup, also add the difference to adjustment_,
-    // so that we don't need to consider forward Fixups as their own dependencies.
-    uint32_t AdjustSizeIfNeeded(uint32_t current_code_size);
-
-    // Increase adjustments. This is called for dependents of a Fixup when its size changes.
-    void IncreaseAdjustment(uint32_t increase) {
-      adjustment_ += increase;
-    }
-
-    // Finalize the branch with an adjustment to the location. Both location and target are updated.
-    void Finalize(uint32_t location_adjustment) {
-      DCHECK_NE(target_, kUnresolved);
-      location_ += location_adjustment;
-      target_ += location_adjustment;
-    }
-
-    // Emit the branch instruction into the assembler buffer.  This does the
-    // encoding into the thumb instruction.
-    void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;
-
-   private:
-    Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
-          Condition cond, Type type, Size size, uint32_t location)
-        : rn_(rn),
-          rt2_(rt2),
-          sd_(sd),
-          dd_(dd),
-          cond_(cond),
-          type_(type),
-          original_size_(size), size_(size),
-          location_(location),
-          target_(kUnresolved),
-          adjustment_(0u),
-          dependents_() {
-    }
-    static size_t SizeInBytes(Size size);
-
-    // The size of padding added before the literal pool.
-    static size_t LiteralPoolPaddingSize(uint32_t current_code_size);
-
-    // Returns the offset from the PC-using insn to the target.
-    int32_t GetOffset(uint32_t current_code_size) const;
-
-    size_t IncreaseSize(Size new_size);
-
-    int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;
-
-    static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
-
-    const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
-    Register rt2_;        // For kLoadLiteralWide.
-    SRegister sd_;        // For kLoadFPLiteralSingle.
-    DRegister dd_;        // For kLoadFPLiteralDouble.
-    const Condition cond_;
-    const Type type_;
-    Size original_size_;
-    Size size_;
-    uint32_t location_;     // Offset into assembler buffer in bytes.
-    uint32_t target_;       // Offset into assembler buffer in bytes.
-    uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
-    std::vector<FixupId> dependents_;  // Fixups that require adjustment when current size changes.
-  };
-
   // Emit a single 32 or 16 bit data processing instruction.
   void EmitDataProcessing(Condition cond,
                           Opcode opcode,
@@ -679,7 +432,7 @@
 
   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
 
-  void EmitBranch(Condition cond, Label* label, bool link, bool x);
+  void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
@@ -722,53 +475,275 @@
     CheckCondition(cond);
   }
 
-  FixupId AddFixup(Fixup fixup) {
-    FixupId fixup_id = static_cast<FixupId>(fixups_.size());
-    fixups_.push_back(fixup);
-    // For iterating using FixupId, we need the next id to be representable.
-    DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size());
-    return fixup_id;
+  // Branches.
+  //
+  // The thumb2 architecture allows branches to be either 16 or 32 bit instructions.  This
+  // depends on both the type of branch and the offset to which it is branching.  When
+  // generating code for branches we don't know the size before hand (if the branch is
+  // going forward, because we haven't seen the target address yet), so we need to assume
+  // that it is going to be one of 16 or 32 bits.  When we know the target (the label is 'bound')
+  // we can determine the actual size of the branch.  However, if we had guessed wrong before
+  // we knew the target there will be no room in the instruction sequence for the new
+  // instruction (assume that we never decrease the size of a branch).
+  //
+  // To handle this, we keep a record of every branch in the program.  The actual instruction
+  // encoding for these is delayed until we know the final size of every branch.  When we
+  // bind a label to a branch (we then know the target address) we determine if the branch
+  // has changed size.  If it has we need to move all the instructions in the buffer after
+  // the branch point forward by the change in size of the branch.  This will create a gap
+  // in the code big enough for the new branch encoding.  However, since we have moved
+  // a chunk of code we need to relocate the branches in that code to their new address.
+  //
+  // Creating a hole in the code for the new branch encoding might cause another branch that was
+  // 16 bits to become 32 bits, so we need to find this in another pass.
+  //
+  // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset
+  // range.  We do this by converting it to two instructions:
+  //     cmp Rn, #0
+  //     b<cond> target
+  // But we also need to handle the case where the conditional branch is out of range and
+  // becomes a 32 bit conditional branch.
+  //
+  // All branches have a 'branch id' which is a 16 bit unsigned number used to identify
+  // the branch.  Unresolved labels use the branch id to link to the next unresolved branch.
+
+  class Branch {
+   public:
+    // Branch type.
+    enum Type {
+      kUnconditional,             // B.
+      kConditional,               // B<cond>.
+      kCompareAndBranchZero,      // cbz.
+      kCompareAndBranchNonZero,   // cbnz.
+      kUnconditionalLink,         // BL.
+      kUnconditionalLinkX,        // BLX.
+      kUnconditionalX             // BX.
+    };
+
+    // Calculated size of branch instruction based on type and offset.
+    enum Size {
+      k16Bit,
+      k32Bit
+    };
+
+    // Unresolved branch possibly with a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved),
+        cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Unresolved compare-and-branch instruction with a register.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved), cond_(AL), rn_(rn) {
+      CHECK(IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Resolved branch (can't be compare-and-branch) with a target and possibly a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target,
+           Condition cond = AL) :
+           assembler_(assembler), type_(type), location_(location),
+           target_(target), cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      // Resolved branch.
+      size_ = CalculateSize();
+    }
+
+    bool IsCompareAndBranch() const {
+      return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero;
+    }
+
+    // Resolve a branch when the target is known.  If this causes the
+    // size of the branch to change return true.  Otherwise return false.
+    bool Resolve(uint32_t target) {
+      uint32_t old_target = target_;
+      target_ = target;
+      if (assembler_->CanRelocateBranches()) {
+        Size new_size = CalculateSize();
+        if (size_ != new_size) {
+          size_ = new_size;
+          return true;
+        }
+        return false;
+      } else {
+        if (kIsDebugBuild) {
+          if (old_target == kUnresolved) {
+            // Check that the size has not increased.
+            DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit));
+          } else {
+            DCHECK(CalculateSize() == size_);
+          }
+        }
+        return false;
+      }
+    }
+
+    // Move a cbz/cbnz branch.  This is always forward.
+    void Move(int32_t delta) {
+      CHECK(IsCompareAndBranch());
+      CHECK_GT(delta, 0);
+      location_ += delta;
+      target_ += delta;
+    }
+
+    // Relocate a branch by a given delta.  This changed the location and
+    // target if they need to be changed.  It also recalculates the
+    // size of the branch instruction.  It returns true if the branch
+    // has changed size.
+    bool Relocate(uint32_t oldlocation, int32_t delta) {
+      DCHECK(assembler_->CanRelocateBranches());
+      if (location_ > oldlocation) {
+        location_ += delta;
+      }
+      if (target_ != kUnresolved) {
+        if (target_ > oldlocation) {
+          target_ += delta;
+        }
+      } else {
+        return false;       // Don't know the size yet.
+      }
+
+      // Calculate the new size.
+      Size new_size = CalculateSize();
+      if (size_ != new_size) {
+        size_ = new_size;
+        return true;
+      }
+      return false;
+    }
+
+    Size GetSize() const {
+      return size_;
+    }
+
+    Type GetType() const {
+      return type_;
+    }
+
+    uint32_t GetLocation() const {
+      return location_;
+    }
+
+    // Emit the branch instruction into the assembler buffer.  This does the
+    // encoding into the thumb instruction.
+    void Emit(AssemblerBuffer* buffer) const;
+
+    // Reset the type and condition to those given.  This used for
+    // cbz/cbnz instructions when they are converted to cmp/b<cond>
+    void ResetTypeAndCondition(Type type, Condition cond) {
+      CHECK(IsCompareAndBranch());
+      CHECK(cond == EQ || cond == NE);
+      type_ = type;
+      cond_ = cond;
+    }
+
+    Register GetRegister() const {
+      return rn_;
+    }
+
+    void ResetSize(Size size) {
+      size_ = size;
+    }
+
+   private:
+    // Calculate the size of the branch instruction based on its type and offset.
+    Size CalculateSize() const {
+      if (target_ == kUnresolved) {
+        if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
+          return k32Bit;
+        }
+        if (IsCompareAndBranch()) {
+          // Compare and branch instructions can only be encoded on 16 bits.
+          return k16Bit;
+        }
+        return assembler_->CanRelocateBranches() ? k16Bit : k32Bit;
+      }
+      // When the target is resolved, we know the best encoding for it.
+      int32_t delta = target_ - location_ - 4;
+      if (delta < 0) {
+        delta = -delta;
+      }
+      switch (type_) {
+        case kUnconditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 11)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kConditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 8)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kCompareAndBranchZero:
+        case kCompareAndBranchNonZero:
+          if (delta >= (1 << 7)) {
+            return k32Bit;      // Will cause this branch to become invalid.
+          }
+          return k16Bit;
+
+        case kUnconditionalX:
+        case kUnconditionalLinkX:
+          return k16Bit;
+        case kUnconditionalLink:
+          return k32Bit;
+      }
+      LOG(FATAL) << "Cannot reach";
+      return k16Bit;
+    }
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
+    const Thumb2Assembler* assembler_;
+    Type type_;
+    uint32_t location_;     // Offset into assembler buffer in bytes.
+    uint32_t target_;       // Offset into assembler buffer in bytes.
+    Size size_;
+    Condition cond_;
+    const Register rn_;
+  };
+
+  std::vector<Branch*> branches_;
+
+  // Add a resolved branch and return its size.
+  Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target,
+                         Condition cond = AL) {
+    branches_.push_back(new Branch(this, type, location, target, cond));
+    return branches_[branches_.size()-1]->GetSize();
   }
 
-  Fixup* GetFixup(FixupId fixup_id) {
-    DCHECK_LT(fixup_id, fixups_.size());
-    return &fixups_[fixup_id];
+  // Add a compare and branch (with a register) and return its id.
+  uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) {
+    branches_.push_back(new Branch(this, type, location, rn));
+    return branches_.size() - 1;
   }
 
-  void BindLabel(Label* label, uint32_t bound_pc);
-  void BindLiterals();
-  void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
-                           std::deque<FixupId>* fixups_to_recalculate);
-  uint32_t AdjustFixups();
-  void EmitFixups(uint32_t adjusted_code_size);
-  void EmitLiterals();
+  // Add an unresolved branch and return its id.
+  uint16_t AddBranch(Branch::Type type,
+                     uint32_t location,
+                     Condition cond = AL,
+                     bool is_near = false) {
+    Branch* branch = new Branch(this, type, location, cond);
+    if (is_near) {
+      branch->ResetSize(Branch::k16Bit);
+    }
+    branches_.push_back(branch);
+    return branches_.size() - 1;
+  }
 
-  static int16_t BEncoding16(int32_t offset, Condition cond);
-  static int32_t BEncoding32(int32_t offset, Condition cond);
-  static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond);
-  static int16_t CmpRnImm8Encoding16(Register rn, int32_t value);
-  static int16_t AddRdnRmEncoding16(Register rdn, Register rm);
-  static int32_t MovwEncoding32(Register rd, int32_t value);
-  static int32_t MovtEncoding32(Register rd, int32_t value);
-  static int32_t MovModImmEncoding32(Register rd, int32_t value);
-  static int16_t LdrLitEncoding16(Register rt, int32_t offset);
-  static int32_t LdrLitEncoding32(Register rt, int32_t offset);
-  static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset);
-  static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset);
-  static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
-  static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
-  static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
+  Branch* GetBranch(uint16_t branchid) {
+    if (branchid >= branches_.size()) {
+      return nullptr;
+    }
+    return branches_[branchid];
+  }
 
-  std::vector<Fixup> fixups_;
-
-  // Use std::deque<> for literal labels to allow insertions at the end
-  // without invalidating pointers and references to existing elements.
-  std::deque<Literal> literals_;
-
-  // Data for AdjustedPosition(), see the description there.
-  uint32_t last_position_adjustment_;
-  uint32_t last_old_position_;
-  FixupId last_fixup_id_;
+  void EmitBranches();
+  void MakeHoleForBranch(uint32_t location, uint32_t size);
 };
 
 }  // namespace arm

diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 68b7931..733441b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc

@@ -78,20 +78,13 @@
     return imm_value;
   }
 
-  std::string RepeatInsn(size_t count, const std::string& insn) {
-    std::string result;
-    for (; count != 0u; --count) {
-      result += insn;
-    }
-    return result;
-  }
-
  private:
   std::vector<arm::Register*> registers_;
 
   static constexpr const char* kThumb2AssemblyHeader = ".syntax unified\n.thumb\n";
 };
 
+
 TEST_F(AssemblerThumb2Test, Toolchain) {
   EXPECT_TRUE(CheckTools());
 }
@@ -377,577 +370,4 @@
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
 
-TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 63;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 64;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cbz r0, 1f\n" +            // cbz r0, label1
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cbz r0, 2f\n"              // cbz r0, label2
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzMaxOffset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 0u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 0u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, TwoCbzBeyondMaxOffset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 63;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 65;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cmp r0, #0\n"              // cbz r0, label1
-      "beq.n 1f\n" +
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cmp r0, #0\n"              // cbz r0, label2
-      "beq.n 2f\n"
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzBeyondMaxOffset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 4u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 4u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, TwoCbzSecondAtMaxB16Offset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 62;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 128;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cbz r0, 1f\n" +            // cbz r0, label1
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cmp r0, #0\n"              // cbz r0, label2
-      "beq.n 2f\n"
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzSecondAtMaxB16Offset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, TwoCbzSecondBeyondMaxB16Offset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 62;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 129;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cmp r0, #0\n"              // cbz r0, label1
-      "beq.n 1f\n" +
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cmp r0, #0\n"              // cbz r0, label2
-      "beq.w 2f\n"
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzSecondBeyondMaxB16Offset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, TwoCbzFirstAtMaxB16Offset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 127;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 64;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cmp r0, #0\n"              // cbz r0, label1
-      "beq.n 1f\n" +
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cbz r0, 2f\n"              // cbz r0, label2
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzFirstAtMaxB16Offset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, TwoCbzFirstBeyondMaxB16Offset) {
-  Label label0, label1, label2;
-  __ cbz(arm::R0, &label1);
-  constexpr size_t kLdrR0R0Count1 = 127;
-  for (size_t i = 0; i != kLdrR0R0Count1; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label0);
-  __ cbz(arm::R0, &label2);
-  __ Bind(&label1);
-  constexpr size_t kLdrR0R0Count2 = 65;
-  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-  __ Bind(&label2);
-
-  std::string expected =
-      "cmp r0, #0\n"              // cbz r0, label1
-      "beq.w 1f\n" +
-      RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") +
-      "0:\n"
-      "cmp r0, #0\n"              // cbz r0, label2
-      "beq.n 2f\n"
-      "1:\n" +
-      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
-      "2:\n";
-  DriverStr(expected, "TwoCbzFirstBeyondMaxB16Offset");
-
-  EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 4u,
-            __ GetAdjustedPosition(label0.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u,
-            __ GetAdjustedPosition(label1.Position()));
-  EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u,
-            __ GetAdjustedPosition(label2.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralMax1KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R0, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 511;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "1:\n"
-      "ldr.n r0, [pc, #((2f - 1b - 2) & ~2)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralMax1KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R0, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 512;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "1:\n"
-      "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralBeyondMax1KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralMax4KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 2046;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "1:\n"
-      "ldr.w r1, [pc, #((2f - 1b - 2) & ~2)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralMax4KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax4KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 2047;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "movw r1, #4096\n"  // "as" does not consider (2f - 1f - 4) a constant expression for movw.
-      "1:\n"
-      "add r1, pc\n"
-      "ldr r1, [r1, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralBeyondMax4KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralMax64KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1u << 15) - 2u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "movw r1, #0xfffc\n"  // "as" does not consider (2f - 1f - 4) a constant expression for movw.
-      "1:\n"
-      "add r1, pc\n"
-      "ldr r1, [r1, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralMax64KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax64KiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1u << 15) - 1u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n"
-      "1:\n"
-      "add r1, pc\n"
-      "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralBeyondMax64KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralMax1MiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1u << 19) - 3u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n"
-      "1:\n"
-      "add r1, pc\n"
-      "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralMax1MiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1MiB) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1u << 19) - 2u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw r1, #(0x100000 & 0xffff)\n"
-      // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt r1, #(0x100000 >> 16)\n"
-      "1:\n"
-      "add r1, pc\n"
-      "ldr.w r1, [r1, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralBeyondMax1MiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralFar) {
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::R1, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1u << 19) - 2u + 0x1234;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw r1, #((0x100000 + 2 * 0x1234) & 0xffff)\n"
-      // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt r1, #((0x100000 + 2 * 0x1234) >> 16)\n"
-      "1:\n"
-      "add r1, pc\n"
-      "ldr.w r1, [r1, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralFar");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralWideMax1KiB) {
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::R1, arm::R3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 510;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "1:\n"
-      "ldrd r1, r3, [pc, #((2f - 1b - 2) & ~2)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x87654321\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralWideMax1KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) {
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::R1, arm::R3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = 511;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x87654321\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralWideBeyondMax1KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) {
-  // The literal size must match but the type doesn't, so use an int32_t rather than float.
-  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
-  __ LoadLiteral(arm::S3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 3u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralSingleMax256KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) {
-  // The literal size must match but the type doesn't, so use an int64_t rather than double.
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::D3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #(0x40000 & 0xffff)\n"
-      // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #(0x40000 >> 16)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr d3, [ip, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x87654321\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) {
-  // The literal size must match but the type doesn't, so use an int64_t rather than double.
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::D3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n"
-      // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr d3, [ip, #0]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x87654321\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralDoubleFar");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
 }  // namespace art

diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index eb8de06..cc78002 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc

@@ -31,7 +31,7 @@
 #define ___   vixl_masm_->
 #endif
 
-void Arm64Assembler::FinalizeCode() {
+void Arm64Assembler::EmitSlowPaths() {
   if (!exception_blocks_.empty()) {
     for (size_t i = 0; i < exception_blocks_.size(); i++) {
       EmitExceptionPoll(exception_blocks_.at(i));

diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index b53c11b..fa9faed 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h

@@ -73,8 +73,8 @@
     delete vixl_masm_;
   }
 
-  // Finalize the code.
-  void FinalizeCode() OVERRIDE;
+  // Emit slow paths queued during assembly.
+  void EmitSlowPaths();
 
   // Size of generated code.
   size_t CodeSize() const;

diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 6d8a989..b016e74 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc

@@ -80,11 +80,10 @@
 }
 
 
-void AssemblerBuffer::ExtendCapacity(size_t min_capacity) {
+void AssemblerBuffer::ExtendCapacity() {
   size_t old_size = Size();
   size_t old_capacity = Capacity();
   size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB);
-  new_capacity = std::max(new_capacity, min_capacity);
 
   // Allocate the new data area and copy contents of the old one to it.
   uint8_t* new_contents = NewContents(new_capacity);

diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 0381af3..672e150 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h

@@ -199,18 +199,13 @@
     *reinterpret_cast<T*>(contents_ + position) = value;
   }
 
-  void Resize(size_t new_size) {
-    if (new_size > Capacity()) {
-      ExtendCapacity(new_size);
-    }
-    cursor_ = contents_ + new_size;
-  }
-
-  void Move(size_t newposition, size_t oldposition, size_t size) {
-    // Move a chunk of the buffer from oldposition to newposition.
-    DCHECK_LE(oldposition + size, Size());
-    DCHECK_LE(newposition + size, Size());
-    memmove(contents_ + newposition, contents_ + oldposition, size);
+  void Move(size_t newposition, size_t oldposition) {
+    CHECK(HasEnsuredCapacity());
+    // Move the contents of the buffer from oldposition to
+    // newposition by nbytes.
+    size_t nbytes = Size() - oldposition;
+    memmove(contents_ + newposition, contents_ + oldposition, nbytes);
+    cursor_ += newposition - oldposition;
   }
 
   // Emit a fixup at the current location.
@@ -355,7 +350,7 @@
     return data + capacity - kMinimumGap;
   }
 
-  void ExtendCapacity(size_t min_capacity = 0u);
+  void ExtendCapacity();
 
   friend class AssemblerFixup;
 };
@@ -381,8 +376,8 @@
  public:
   static Assembler* Create(InstructionSet instruction_set);
 
-  // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
-  virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
+  // Emit slow paths queued during assembly
+  virtual void EmitSlowPaths() { buffer_.EmitSlowPaths(this); }
 
   // Size of generated code
   virtual size_t CodeSize() const { return buffer_.Size(); }

diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 017402d..a339633 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h

@@ -544,7 +544,6 @@
   }
 
   void DriverWrapper(std::string assembly_text, std::string test_name) {
-    assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());

diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 20f61f9..1a2c9a9 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc

@@ -65,33 +65,20 @@
   return *s1 - *s2;
 }
 
-void InitResults() {
-  if (test_results.empty()) {
-    setup_results();
-  }
-}
-
-std::string GetToolsDir() {
+void dump(std::vector<uint8_t>& code, const char* testname) {
+  // This will only work on the host.  There is no as, objcopy or objdump on the
+  // device.
 #ifndef HAVE_ANDROID_OS
-  // This will only work on the host.  There is no as, objcopy or objdump on the device.
+  static bool results_ok = false;
   static std::string toolsdir;
 
-  if (toolsdir.empty()) {
+  if (!results_ok) {
     setup_results();
     toolsdir = CommonRuntimeTest::GetAndroidTargetToolsDir(kThumb2);
     SetAndroidData();
+    results_ok = true;
   }
 
-  return toolsdir;
-#else
-  return std::string();
-#endif
-}
-
-void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) {
-#ifndef HAVE_ANDROID_OS
-  static std::string toolsdir = GetToolsDir();
-
   ScratchFile file;
 
   const char* filename = file.GetFilename().c_str();
@@ -143,6 +130,9 @@
     FILE *fp = popen(cmd, "r");
     ASSERT_TRUE(fp != nullptr);
 
+    std::map<std::string, const char**>::iterator results = test_results.find(testname);
+    ASSERT_NE(results, test_results.end());
+
     uint32_t lineindex = 0;
 
     while (!feof(fp)) {
@@ -151,14 +141,14 @@
       if (s == nullptr) {
         break;
       }
-      if (CompareIgnoringSpace(results[lineindex], testline) != 0) {
+      if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) {
         LOG(FATAL) << "Output is not as expected at line: " << lineindex
-          << results[lineindex] << "/" << testline;
+          << results->second[lineindex] << "/" << testline;
       }
       ++lineindex;
     }
     // Check that we are at the end.
-    ASSERT_TRUE(results[lineindex] == nullptr);
+    ASSERT_TRUE(results->second[lineindex] == nullptr);
     fclose(fp);
   }
 
@@ -173,31 +163,8 @@
 
 #define __ assembler->
 
-void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname,
-                  const char* const* results) {
-  __ FinalizeCode();
-  size_t cs = __ CodeSize();
-  std::vector<uint8_t> managed_code(cs);
-  MemoryRegion code(&managed_code[0], managed_code.size());
-  __ FinalizeInstructions(code);
-
-  DumpAndCheck(managed_code, testname, results);
-}
-
-void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname) {
-  InitResults();
-  std::map<std::string, const char* const*>::iterator results = test_results.find(testname);
-  ASSERT_NE(results, test_results.end());
-
-  EmitAndCheck(assembler, testname, results->second);
-}
-
-#undef __
-
-#define __ assembler.
-
 TEST(Thumb2AssemblerTest, SimpleMov) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
@@ -205,31 +172,46 @@
   __ mov(R0, ShifterOperand(1));
   __ mov(R8, ShifterOperand(9));
 
-  EmitAndCheck(&assembler, "SimpleMov");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMov");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, SimpleMov32) {
-  arm::Thumb2Assembler assembler;
-  __ Force32Bit();
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+  assembler->Force32Bit();
 
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
 
-  EmitAndCheck(&assembler, "SimpleMov32");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMov32");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, SimpleMovAdd) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(R1));
   __ add(R0, R1, ShifterOperand(R2));
   __ add(R0, R1, ShifterOperand());
 
-  EmitAndCheck(&assembler, "SimpleMovAdd");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleMovAdd");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, DataProcessingRegister) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(R1));
   __ mvn(R0, ShifterOperand(R1));
@@ -267,11 +249,16 @@
   // 32 bit variants.
   __ add(R12, R1, ShifterOperand(R0));
 
-  EmitAndCheck(&assembler, "DataProcessingRegister");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingRegister");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, DataProcessingImmediate) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(0x55));
   __ mvn(R0, ShifterOperand(0x55));
@@ -296,11 +283,16 @@
   __ movs(R0, ShifterOperand(0x55));
   __ mvns(R0, ShifterOperand(0x55));
 
-  EmitAndCheck(&assembler, "DataProcessingImmediate");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingImmediate");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(0x550055));
   __ mvn(R0, ShifterOperand(0x550055));
@@ -319,12 +311,17 @@
   __ cmp(R0, ShifterOperand(0x550055));
   __ cmn(R0, ShifterOperand(0x550055));
 
-  EmitAndCheck(&assembler, "DataProcessingModifiedImmediate");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingModifiedImmediate");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R0, ShifterOperand(0x550055));
   __ mov(R0, ShifterOperand(0x55005500));
@@ -334,11 +331,16 @@
   __ mov(R0, ShifterOperand(0x350));            // rotated to 2nd last position
   __ mov(R0, ShifterOperand(0x1a8));            // rotated to last position
 
-  EmitAndCheck(&assembler, "DataProcessingModifiedImmediates");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingModifiedImmediates");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mov(R3, ShifterOperand(R4, LSL, 4));
   __ mov(R3, ShifterOperand(R4, LSR, 5));
@@ -353,12 +355,17 @@
   __ mov(R8, ShifterOperand(R4, ROR, 7));
   __ mov(R8, ShifterOperand(R4, RRX));
 
-  EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "DataProcessingShiftedRegister");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, BasicLoad) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ ldr(R3, Address(R4, 24));
   __ ldrb(R3, Address(R4, 24));
@@ -375,12 +382,17 @@
   __ ldrsb(R8, Address(R4, 24));
   __ ldrsh(R8, Address(R4, 24));
 
-  EmitAndCheck(&assembler, "BasicLoad");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicLoad");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, BasicStore) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ str(R3, Address(R4, 24));
   __ strb(R3, Address(R4, 24));
@@ -393,11 +405,16 @@
   __ strb(R8, Address(R4, 24));
   __ strh(R8, Address(R4, 24));
 
-  EmitAndCheck(&assembler, "BasicStore");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicStore");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, ComplexLoad) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ ldr(R3, Address(R4, 24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex));
@@ -434,12 +451,17 @@
   __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPreIndex));
   __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPostIndex));
 
-  EmitAndCheck(&assembler, "ComplexLoad");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexLoad");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, ComplexStore) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ str(R3, Address(R4, 24, Address::Mode::Offset));
   __ str(R3, Address(R4, 24, Address::Mode::PreIndex));
@@ -462,11 +484,16 @@
   __ strh(R3, Address(R4, 24, Address::Mode::NegPreIndex));
   __ strh(R3, Address(R4, 24, Address::Mode::NegPostIndex));
 
-  EmitAndCheck(&assembler, "ComplexStore");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexStore");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, NegativeLoadStore) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ ldr(R3, Address(R4, -24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex));
@@ -524,20 +551,30 @@
   __ strh(R3, Address(R4, -24, Address::Mode::NegPreIndex));
   __ strh(R3, Address(R4, -24, Address::Mode::NegPostIndex));
 
-  EmitAndCheck(&assembler, "NegativeLoadStore");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "NegativeLoadStore");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
 
-  EmitAndCheck(&assembler, "SimpleLoadStoreDual");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleLoadStoreDual");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ strd(R2, Address(R0, 24, Address::Mode::PreIndex));
@@ -553,11 +590,16 @@
   __ ldrd(R2, Address(R0, 24, Address::Mode::NegPreIndex));
   __ ldrd(R2, Address(R0, 24, Address::Mode::NegPostIndex));
 
-  EmitAndCheck(&assembler, "ComplexLoadStoreDual");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "ComplexLoadStoreDual");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ strd(R2, Address(R0, -24, Address::Mode::Offset));
   __ strd(R2, Address(R0, -24, Address::Mode::PreIndex));
@@ -573,11 +615,16 @@
   __ ldrd(R2, Address(R0, -24, Address::Mode::NegPreIndex));
   __ ldrd(R2, Address(R0, -24, Address::Mode::NegPostIndex));
 
-  EmitAndCheck(&assembler, "NegativeLoadStoreDual");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "NegativeLoadStoreDual");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, SimpleBranch) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ mov(R0, ShifterOperand(2));
@@ -611,12 +658,17 @@
   __ Bind(&l5);
   __ mov(R0, ShifterOperand(6));
 
-  EmitAndCheck(&assembler, "SimpleBranch");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SimpleBranch");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, LongBranch) {
-  arm::Thumb2Assembler assembler;
-  __ Force32Bit();
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+  assembler->Force32Bit();
   // 32 bit branches.
   Label l1;
   __ mov(R0, ShifterOperand(2));
@@ -651,11 +703,16 @@
   __ Bind(&l5);
   __ mov(R0, ShifterOperand(6));
 
-  EmitAndCheck(&assembler, "LongBranch");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LongBranch");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, LoadMultiple) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   // 16 bit.
   __ ldm(DB_W, R4, (1 << R0 | 1 << R3));
@@ -667,11 +724,16 @@
   // Single reg is converted to ldr
   __ ldm(DB_W, R4, (1 << R5));
 
-  EmitAndCheck(&assembler, "LoadMultiple");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadMultiple");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, StoreMultiple) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   // 16 bit.
   __ stm(IA_W, R4, (1 << R0 | 1 << R3));
@@ -684,11 +746,16 @@
   __ stm(IA_W, R4, (1 << R5));
   __ stm(IA, R4, (1 << R5));
 
-  EmitAndCheck(&assembler, "StoreMultiple");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StoreMultiple");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, MovWMovT) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ movw(R4, 0);         // 16 bit.
   __ movw(R4, 0x34);      // 16 bit.
@@ -701,11 +768,16 @@
   __ movt(R0, 0x1234);
   __ movt(R1, 0xffff);
 
-  EmitAndCheck(&assembler, "MovWMovT");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "MovWMovT");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, SpecialAddSub) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ add(R2, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(SP, SP, ShifterOperand(0x50));   // 16 bit.
@@ -720,11 +792,16 @@
 
   __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
 
-  EmitAndCheck(&assembler, "SpecialAddSub");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "SpecialAddSub");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, StoreToOffset) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
   __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
@@ -732,12 +809,17 @@
   __ StoreToOffset(kStoreHalfword, R0, R12, 12);
   __ StoreToOffset(kStoreByte, R2, R12, 12);
 
-  EmitAndCheck(&assembler, "StoreToOffset");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StoreToOffset");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, IfThen) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ it(EQ);
   __ mov(R1, ShifterOperand(1), EQ);
@@ -766,11 +848,16 @@
   __ mov(R3, ShifterOperand(3), EQ);
   __ mov(R4, ShifterOperand(4), NE);
 
-  EmitAndCheck(&assembler, "IfThen");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "IfThen");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, CbzCbnz) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ cbz(R2, &l1);
@@ -786,11 +873,16 @@
   __ Bind(&l2);
   __ mov(R2, ShifterOperand(4));
 
-  EmitAndCheck(&assembler, "CbzCbnz");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CbzCbnz");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Multiply) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ mul(R0, R1, R0);
   __ mul(R0, R1, R2);
@@ -806,11 +898,16 @@
   __ umull(R0, R1, R2, R3);
   __ umull(R8, R9, R10, R11);
 
-  EmitAndCheck(&assembler, "Multiply");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Multiply");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Divide) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ sdiv(R0, R1, R2);
   __ sdiv(R8, R9, R10);
@@ -818,11 +915,16 @@
   __ udiv(R0, R1, R2);
   __ udiv(R8, R9, R10);
 
-  EmitAndCheck(&assembler, "Divide");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Divide");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, VMov) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ vmovs(S1, 1.0);
   __ vmovd(D1, 1.0);
@@ -830,12 +932,17 @@
   __ vmovs(S1, S2);
   __ vmovd(D1, D2);
 
-  EmitAndCheck(&assembler, "VMov");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "VMov");
+  delete assembler;
 }
 
 
 TEST(Thumb2AssemblerTest, BasicFloatingPoint) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ vadds(S0, S1, S2);
   __ vsubs(S0, S1, S2);
@@ -857,11 +964,16 @@
   __ vnegd(D0, D1);
   __ vsqrtd(D0, D1);
 
-  EmitAndCheck(&assembler, "BasicFloatingPoint");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "BasicFloatingPoint");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, FloatingPointConversions) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ vcvtsd(S2, D2);
   __ vcvtds(D2, S2);
@@ -878,11 +990,16 @@
   __ vcvtud(S1, D2);
   __ vcvtdu(D1, S2);
 
-  EmitAndCheck(&assembler, "FloatingPointConversions");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "FloatingPointConversions");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, FloatingPointComparisons) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ vcmps(S0, S1);
   __ vcmpd(D0, D1);
@@ -890,37 +1007,57 @@
   __ vcmpsz(S2);
   __ vcmpdz(D2);
 
-  EmitAndCheck(&assembler, "FloatingPointComparisons");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "FloatingPointComparisons");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Calls) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ blx(LR);
   __ bx(LR);
 
-  EmitAndCheck(&assembler, "Calls");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Calls");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Breakpoint) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ bkpt(0);
 
-  EmitAndCheck(&assembler, "Breakpoint");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Breakpoint");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, StrR1) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ str(R1, Address(SP, 68));
   __ str(R1, Address(SP, 1068));
 
-  EmitAndCheck(&assembler, "StrR1");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "StrR1");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, VPushPop) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ vpushs(S2, 4);
   __ vpushd(D2, 4);
@@ -928,11 +1065,16 @@
   __ vpops(S2, 4);
   __ vpopd(D2, 4);
 
-  EmitAndCheck(&assembler, "VPushPop");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "VPushPop");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Max16BitBranch) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ b(&l1);
@@ -942,11 +1084,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "Max16BitBranch");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Max16BitBranch");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Branch32) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ b(&l1);
@@ -956,11 +1103,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "Branch32");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Branch32");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, CompareAndBranchMax) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ cbz(R4, &l1);
@@ -970,11 +1122,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "CompareAndBranchMax");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchMax");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ cbz(R4, &l1);
@@ -984,11 +1141,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "CompareAndBranchRelocation16");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchRelocation16");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   __ cbz(R4, &l1);
@@ -998,11 +1160,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "CompareAndBranchRelocation32");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranchRelocation32");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, MixedBranch32) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   Label l1;
   Label l2;
@@ -1017,11 +1184,16 @@
   __ Bind(&l1);
   __ mov(R1, ShifterOperand(R2));
 
-  EmitAndCheck(&assembler, "MixedBranch32");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "MixedBranch32");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, Shifts) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   // 16 bit
   __ Lsl(R0, R1, 5);
@@ -1068,11 +1240,16 @@
   __ Lsr(R0, R8, R2, true);
   __ Asr(R0, R1, R8, true);
 
-  EmitAndCheck(&assembler, "Shifts");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Shifts");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   // 16 bit.
   __ ldr(R0, Address(R1, R2));
@@ -1095,11 +1272,16 @@
   __ ldr(R0, Address(R1, R8));
   __ str(R0, Address(R1, R8));
 
-  EmitAndCheck(&assembler, "LoadStoreRegOffset");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreRegOffset");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ ldr(R0, Address(4));
   __ str(R0, Address(4));
@@ -1113,11 +1295,16 @@
   __ str(R0, Address(0x3ff));       // 32 bit (no 16 bit str(literal)).
   __ str(R0, Address(0x7ff));       // 11 bits (32 bit).
 
-  EmitAndCheck(&assembler, "LoadStoreLiteral");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreLiteral");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, LoadStoreLimits) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
   __ ldr(R0, Address(R4, 124));     // 16 bit.
   __ ldr(R0, Address(R4, 128));     // 32 bit.
@@ -1143,20 +1330,30 @@
   __ strh(R0, Address(R4, 62));     // 16 bit.
   __ strh(R0, Address(R4, 64));     // 32 bit.
 
-  EmitAndCheck(&assembler, "LoadStoreLimits");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreLimits");
+  delete assembler;
 }
 
 TEST(Thumb2AssemblerTest, CompareAndBranch) {
-  arm::Thumb2Assembler assembler;
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
 
-  Label label;
+  arm::NearLabel label;
   __ CompareAndBranchIfZero(arm::R0, &label);
   __ CompareAndBranchIfZero(arm::R11, &label);
   __ CompareAndBranchIfNonZero(arm::R0, &label);
   __ CompareAndBranchIfNonZero(arm::R11, &label);
   __ Bind(&label);
 
-  EmitAndCheck(&assembler, "CompareAndBranch");
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranch");
+  delete assembler;
 }
 
 #undef __

diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 280ed77..841d6a0 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc

@@ -4832,7 +4832,7 @@
   nullptr
 };
 
-std::map<std::string, const char* const*> test_results;
+std::map<std::string, const char**> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
     test_results["SimpleMov32"] = SimpleMov32Results;
commit	fbeb4aede0ddc5b1e6a5a3a40cc6266fe8518c98	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Tue Jun 16 11:32:01 2015 +0000
committer	Vladimir Marko <vmarko@google.com>	Tue Jun 16 11:32:01 2015 +0000
tree	76ab28cf259def4dccec529df217fd760f27d2aa
parent	f38caa68cce551fb153dff37d01db518e58ed00f [diff]