Compile link-time thunks in codegen.

Prepare for experimenting with Baker read barrier marking
introspection entrypoints for JIT.

Test: m test-art-host-gtest
Test: Compare compiled boot*.oat before and after (no diff).
Test: Pixel 2 XL boots.
Bug: 36141117
Change-Id: Idb413a31b158db4bf89a8707ea46dd167a06f110
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c2ae764..231017f 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -449,6 +449,18 @@
   // No linker patches by default.
 }
 
+bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+  // Code generators that create patches requiring thunk compilation should override this function.
+  return false;
+}
+
+void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                  /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                  /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+  // Code generators that create patches requiring thunk compilation should override this function.
+  LOG(FATAL) << "Unexpected call to EmitThunkCode().";
+}
+
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_safepoint_spill_size,
                                              size_t number_of_out_slots,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 3bd5e14..a86b271 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -21,6 +21,7 @@
 #include "arch/instruction_set_features.h"
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "base/array_ref.h"
 #include "base/bit_field.h"
 #include "base/bit_utils.h"
 #include "base/enums.h"
@@ -74,6 +75,7 @@
   virtual ~CodeAllocator() {}
 
   virtual uint8_t* Allocate(size_t size) = 0;
+  virtual ArrayRef<const uint8_t> GetMemory() const = 0;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
@@ -210,6 +212,10 @@
   virtual void Initialize() = 0;
   virtual void Finalize(CodeAllocator* allocator);
   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
+  virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
+  virtual void EmitThunkCode(const linker::LinkerPatch& patch,
+                             /*out*/ ArenaVector<uint8_t>* code,
+                             /*out*/ std::string* debug_name);
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 273346a..31887d9 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -30,7 +30,6 @@
 #include "heap_poisoning.h"
 #include "intrinsics.h"
 #include "intrinsics_arm64.h"
-#include "linker/arm64/relative_patcher_arm64.h"
 #include "linker/linker_patch.h"
 #include "lock_word.h"
 #include "mirror/array-inl.h"
@@ -1425,6 +1424,62 @@
   __ FinalizeCode();
 
   CodeGenerator::Finalize(allocator);
+
+  // Verify Baker read barrier linker patches.
+  if (kIsDebugBuild) {
+    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+      DCHECK(info.label.IsBound());
+      uint32_t literal_offset = info.label.GetLocation();
+      DCHECK_ALIGNED(literal_offset, 4u);
+
+      auto GetInsn = [&code](uint32_t offset) {
+        DCHECK_ALIGNED(offset, 4u);
+        return
+            (static_cast<uint32_t>(code[offset + 0]) << 0) +
+            (static_cast<uint32_t>(code[offset + 1]) << 8) +
+            (static_cast<uint32_t>(code[offset + 2]) << 16)+
+            (static_cast<uint32_t>(code[offset + 3]) << 24);
+      };
+
+      const uint32_t encoded_data = info.custom_data;
+      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+      // Check that the next instruction matches the expected LDR.
+      switch (kind) {
+        case BakerReadBarrierKind::kField: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn(literal_offset + 4u);
+          // LDR (immediate) with correct base_reg.
+          CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
+          break;
+        }
+        case BakerReadBarrierKind::kArray: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn(literal_offset + 4u);
+          // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+          // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+          CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+          CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
+          break;
+        }
+        case BakerReadBarrierKind::kGcRoot: {
+          DCHECK_GE(literal_offset, 4u);
+          uint32_t prev_insn = GetInsn(literal_offset - 4u);
+          // LDR (immediate) with correct root_reg.
+          const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+          UNREACHABLE();
+      }
+    }
+  }
 }
 
 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
@@ -4814,6 +4869,44 @@
   DCHECK_EQ(size, linker_patches->size());
 }
 
+bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+         patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
+                                       /*out*/ ArenaVector<uint8_t>* code,
+                                       /*out*/ std::string* debug_name) {
+  Arm64Assembler assembler(GetGraph()->GetAllocator());
+  switch (patch.GetType()) {
+    case linker::LinkerPatch::Type::kCallRelative: {
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kArm64PointerSize).Int32Value());
+      assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "MethodCallThunk";
+      }
+      break;
+    }
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
+      DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+      CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+      UNREACHABLE();
+  }
+
+  // Ensure we emit the literal pool if any.
+  assembler.FinalizeCode();
+  code->resize(assembler.CodeSize());
+  MemoryRegion code_region(code->data(), code->size());
+  assembler.FinalizeInstructions(code_region);
+}
+
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
   return uint32_literals_.GetOrCreate(
       value,
@@ -4954,12 +5047,12 @@
       DCHECK(!cls->MustGenerateClinitCheck());
       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
       Register current_method = InputRegisterAt(cls, 0);
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              current_method,
-                              ArtMethod::DeclaringClassOffset().Int32Value(),
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        current_method,
+                                        ArtMethod::DeclaringClassOffset().Int32Value(),
+                                        /* fixup_label */ nullptr,
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -5006,12 +5099,12 @@
       vixl::aarch64::Label* ldr_label =
           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              temp,
-                              /* offset placeholder */ 0u,
-                              ldr_label,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        temp,
+                                        /* offset placeholder */ 0u,
+                                        ldr_label,
+                                        read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -5019,12 +5112,12 @@
       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                        cls->GetTypeIndex(),
                                                        cls->GetClass()));
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              out.X(),
-                              /* offset */ 0,
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        out.X(),
+                                        /* offset */ 0,
+                                        /* fixup_label */ nullptr,
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kRuntimeCall:
@@ -5167,12 +5260,12 @@
       vixl::aarch64::Label* ldr_label =
           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
-      GenerateGcRootFieldLoad(load,
-                              out_loc,
-                              temp,
-                              /* offset placeholder */ 0u,
-                              ldr_label,
-                              kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(load,
+                                        out_loc,
+                                        temp,
+                                        /* offset placeholder */ 0u,
+                                        ldr_label,
+                                        kCompilerReadBarrierOption);
       SlowPathCodeARM64* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
       codegen_->AddSlowPath(slow_path);
@@ -5185,12 +5278,12 @@
       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
                                                         load->GetStringIndex(),
                                                         load->GetString()));
-      GenerateGcRootFieldLoad(load,
-                              out_loc,
-                              out.X(),
-                              /* offset */ 0,
-                              /* fixup_label */ nullptr,
-                              kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(load,
+                                        out_loc,
+                                        out.X(),
+                                        /* offset */ 0,
+                                        /* fixup_label */ nullptr,
+                                        kCompilerReadBarrierOption);
       return;
     }
     default:
@@ -6139,7 +6232,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+void CodeGeneratorARM64::GenerateGcRootFieldLoad(
     HInstruction* instruction,
     Location root,
     Register obj,
@@ -6173,9 +6266,8 @@
         DCHECK(temps.IsAvailable(ip0));
         DCHECK(temps.IsAvailable(ip1));
         temps.Exclude(ip0, ip1);
-        uint32_t custom_data =
-            linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
-        vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+        uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+        vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
         vixl::aarch64::Label return_address;
@@ -6204,14 +6296,14 @@
         // Slow path marking the GC root `root`. The entrypoint will
         // be loaded by the slow path code.
         SlowPathCodeARM64* slow_path =
-            new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
-        codegen_->AddSlowPath(slow_path);
+            new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
+        AddSlowPath(slow_path);
 
         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
         if (fixup_label == nullptr) {
           __ Ldr(root_reg, MemOperand(obj, offset));
         } else {
-          codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+          EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
         }
         static_assert(
             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
@@ -6231,10 +6323,10 @@
       if (fixup_label == nullptr) {
         __ Add(root_reg.X(), obj.X(), offset);
       } else {
-        codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
+        EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
       }
       // /* mirror::Object* */ root = root->Read()
-      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+      GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
@@ -6242,12 +6334,12 @@
     if (fixup_label == nullptr) {
       __ Ldr(root_reg, MemOperand(obj, offset));
     } else {
-      codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
+      EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
     }
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
-  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
 }
 
 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6296,9 +6388,7 @@
     DCHECK(temps.IsAvailable(ip0));
     DCHECK(temps.IsAvailable(ip1));
     temps.Exclude(ip0, ip1);
-    uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
-        base.GetCode(),
-        obj.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     {
@@ -6383,8 +6473,7 @@
     DCHECK(temps.IsAvailable(ip0));
     DCHECK(temps.IsAvailable(ip1));
     temps.Exclude(ip0, ip1);
-    uint32_t custom_data =
-        linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(temp.X(), obj.X(), Operand(data_offset));
@@ -6744,5 +6833,176 @@
 #undef __
 #undef QUICK_ENTRY_POINT
 
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
+                                     vixl::aarch64::Register base_reg,
+                                     vixl::aarch64::MemOperand& lock_word,
+                                     vixl::aarch64::Label* slow_path) {
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip0.W(), lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
+  static_assert(
+      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+      "Field and array LDR offsets must be the same to reuse the same code.");
+  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                "Field LDR must be 1 instruction (4B) before the return address label; "
+                " 2 instructions (8B) for heap poisoning.");
+  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
+  __ Br(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`.
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
+                                                       vixl::aarch64::Register entrypoint) {
+  // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+  DCHECK_EQ(ip0.GetCode(), 16u);
+  const int32_t entry_point_offset =
+      Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+                                                      uint32_t encoded_data,
+                                                      /*out*/ std::string* debug_name) {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint (in IP1) that performs further checks
+      // on the reference and marks it if needed.
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      auto holder_reg =
+          Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      // If base_reg differs from holder_reg, the offset was too large and we must have
+      // emitted an explicit null check before the load. Otherwise, we need to null-check
+      // the holder as we do not necessarily do that check before going to the thunk.
+      vixl::aarch64::Label throw_npe;
+      if (holder_reg.Is(base_reg)) {
+        __ Cbz(holder_reg.W(), &throw_npe);
+      }
+      vixl::aarch64::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
+      __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Br(ip1);                           // Jump to the entrypoint.
+      if (holder_reg.Is(base_reg)) {
+        // Add null check slow path. The stack map is at the address pointed to by LR.
+        __ Bind(&throw_npe);
+        int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
+        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
+        __ Br(ip0);
+      }
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      auto base_reg =
+          Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffset(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
+      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
+                                            // a switch case target based on the index register.
+      __ Mov(ip0, base_reg);                // Move the base register to ip0.
+      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      auto root_reg =
+          Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label return_label, not_marked, forwarding_address;
+      __ Cbz(root_reg, &return_label);
+      MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip0.W(), lock_word);
+      __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
+      __ Bind(&return_label);
+      __ Br(lr);
+      __ Bind(&not_marked);
+      __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
+      __ B(&forwarding_address, mi);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+      // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
+      // art_quick_read_barrier_mark_introspection_gc_roots.
+      __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+      __ Mov(ip0.W(), root_reg);
+      __ Br(ip1);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
+      __ Br(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+
+  if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+    std::ostringstream oss;
+    oss << "BakerReadBarrierThunk";
+    switch (kind) {
+      case BakerReadBarrierKind::kField:
+        oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+            << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+        break;
+      case BakerReadBarrierKind::kArray:
+        oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+      case BakerReadBarrierKind::kGcRoot:
+        oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+    }
+    *debug_name = oss.str();
+  }
+}
+
+#undef __
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 6a52eec..aa343b1 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
 #include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "base/bit_field.h"
 #include "code_generator.h"
 #include "common_arm64.h"
 #include "dex/dex_file_types.h"
@@ -36,6 +37,11 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+namespace linker {
+class Arm64RelativePatcherTest;
+}  // namespace linker
+
 namespace arm64 {
 
 class CodeGeneratorARM64;
@@ -309,17 +315,6 @@
                                          uint32_t offset,
                                          Location maybe_temp,
                                          ReadBarrierOption read_barrier_option);
-  // Generate a GC root reference load:
-  //
-  //   root <- *(obj + offset)
-  //
-  // while honoring read barriers based on read_barrier_option.
-  void GenerateGcRootFieldLoad(HInstruction* instruction,
-                               Location root,
-                               vixl::aarch64::Register obj,
-                               uint32_t offset,
-                               vixl::aarch64::Label* fixup_label,
-                               ReadBarrierOption read_barrier_option);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -641,9 +636,24 @@
                                 vixl::aarch64::Register base);
 
   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+  bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+  void EmitThunkCode(const linker::LinkerPatch& patch,
+                     /*out*/ ArenaVector<uint8_t>* code,
+                     /*out*/ std::string* debug_name) OVERRIDE;
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers based on read_barrier_option.
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch64::Register obj,
+                               uint32_t offset,
+                               vixl::aarch64::Label* fixup_label,
+                               ReadBarrierOption read_barrier_option);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -778,6 +788,62 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
 
  private:
+  // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
+  static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBakerReadBarrierBitsForRegister =
+      MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t,
+               kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+               kBakerReadBarrierBitsForRegister>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < vixl::aarch64::lr.GetCode() &&
+           reg != vixl::aarch64::ip0.GetCode() &&
+           reg != vixl::aarch64::ip1.GetCode()) << reg;
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg);
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+  }
+
+  static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+    CheckValidReg(root_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+  }
+
+  void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+                                    uint32_t encoded_data,
+                                    /*out*/ std::string* debug_name);
+
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
   using StringToLiteralMap = ArenaSafeMap<StringReference,
@@ -854,6 +920,7 @@
   // Patches for class literals in JIT compiled code.
   TypeToLiteralMap jit_class_patches_;
 
+  friend class linker::Arm64RelativePatcherTest;
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b38a006..15d9526 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -94,9 +94,6 @@
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
-// The reserved entrypoint register for link-time generated thunks.
-const vixl32::Register kBakerCcEntrypointRegister = r4;
-
 // Using a base helps identify when we hit Marking Register check breakpoints.
 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
 
@@ -116,8 +113,6 @@
   DCHECK(temps->IsAvailable(ip));
   temps->Exclude(ip);
   DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
-  DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
-            linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
   DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
   DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
       instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
@@ -2422,6 +2417,80 @@
   FixJumpTables();
   GetAssembler()->FinalizeCode();
   CodeGenerator::Finalize(allocator);
+
+  // Verify Baker read barrier linker patches.
+  if (kIsDebugBuild) {
+    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+      DCHECK(info.label.IsBound());
+      uint32_t literal_offset = info.label.GetLocation();
+      DCHECK_ALIGNED(literal_offset, 2u);
+
+      auto GetInsn16 = [&code](uint32_t offset) {
+        DCHECK_ALIGNED(offset, 2u);
+        return (static_cast<uint32_t>(code[offset + 0]) << 0) +
+               (static_cast<uint32_t>(code[offset + 1]) << 8);
+      };
+      auto GetInsn32 = [=](uint32_t offset) {
+        return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
+      };
+
+      uint32_t encoded_data = info.custom_data;
+      BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+      // Check that the next instruction matches the expected LDR.
+      switch (kind) {
+        case BakerReadBarrierKind::kField: {
+          BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+          if (width == BakerReadBarrierWidth::kWide) {
+            DCHECK_GE(code.size() - literal_offset, 8u);
+            uint32_t next_insn = GetInsn32(literal_offset + 4u);
+            // LDR (immediate), encoding T3, with correct base_reg.
+            CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+            const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+          } else {
+            DCHECK_GE(code.size() - literal_offset, 6u);
+            uint32_t next_insn = GetInsn16(literal_offset + 4u);
+            // LDR (immediate), encoding T1, with correct base_reg.
+            CheckValidReg(next_insn & 0x7u);  // Check destination register.
+            const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+          }
+          break;
+        }
+        case BakerReadBarrierKind::kArray: {
+          DCHECK_GE(code.size() - literal_offset, 8u);
+          uint32_t next_insn = GetInsn32(literal_offset + 4u);
+          // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+          CheckValidReg((next_insn >> 12) & 0xfu);  // Check destination register.
+          const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+          CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+          CheckValidReg(next_insn & 0xf);  // Check index register
+          break;
+        }
+        case BakerReadBarrierKind::kGcRoot: {
+          BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+          if (width == BakerReadBarrierWidth::kWide) {
+            DCHECK_GE(literal_offset, 4u);
+            uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+            // LDR (immediate), encoding T3, with correct root_reg.
+            const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+          } else {
+            DCHECK_GE(literal_offset, 2u);
+            uint32_t prev_insn = GetInsn16(literal_offset - 2u);
+            // LDR (immediate), encoding T1, with correct root_reg.
+            const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+            CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+          }
+          break;
+        }
+        default:
+          LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+          UNREACHABLE();
+      }
+    }
+  }
 }
 
 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
@@ -7413,11 +7482,11 @@
       DCHECK(!cls->MustGenerateClinitCheck());
       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
       vixl32::Register current_method = InputRegisterAt(cls, 0);
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              current_method,
-                              ArtMethod::DeclaringClassOffset().Int32Value(),
-                              read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls,
+                                        out_loc,
+                                        current_method,
+                                        ArtMethod::DeclaringClassOffset().Int32Value(),
+                                        read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -7448,7 +7517,7 @@
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -7457,7 +7526,7 @@
                                                        cls->GetTypeIndex(),
                                                        cls->GetClass()));
       // /* GcRoot<mirror::Class> */ out = *out
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+      codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kRuntimeCall:
@@ -7665,7 +7734,8 @@
       CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
           codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
       codegen_->EmitMovwMovtPlaceholder(labels, out);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(
+          load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       LoadStringSlowPathARMVIXL* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
       codegen_->AddSlowPath(slow_path);
@@ -7679,7 +7749,8 @@
                                                         load->GetStringIndex(),
                                                         load->GetString()));
       // /* GcRoot<mirror::String> */ out = *out
-      GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+      codegen_->GenerateGcRootFieldLoad(
+          load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       return;
     }
     default:
@@ -8730,7 +8801,7 @@
   }
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
     HInstruction* instruction,
     Location root,
     vixl32::Register obj,
@@ -8761,9 +8832,8 @@
         UseScratchRegisterScope temps(GetVIXLAssembler());
         ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
         bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
-        uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
-            root_reg.GetCode(), narrow);
-        vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+        uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
+        vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
         vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
         vixl32::Label return_address;
@@ -8774,7 +8844,7 @@
         DCHECK_LT(offset, kReferenceLoadMinFarOffset);
         ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
         __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
-        EmitPlaceholderBne(codegen_, bne_label);
+        EmitPlaceholderBne(this, bne_label);
         __ Bind(&return_address);
         DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
                   narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
@@ -8794,8 +8864,8 @@
         // Slow path marking the GC root `root`. The entrypoint will
         // be loaded by the slow path code.
         SlowPathCodeARMVIXL* slow_path =
-            new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
-        codegen_->AddSlowPath(slow_path);
+            new (GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
+        AddSlowPath(slow_path);
 
         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
         GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
@@ -8816,7 +8886,7 @@
       // /* GcRoot<mirror::Object>* */ root = obj + offset
       __ Add(root_reg, obj, offset);
       // /* mirror::Object* */ root = root->Read()
-      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+      GenerateReadBarrierForRootSlow(instruction, root, root);
     }
   } else {
     // Plain GC root load with no read barrier.
@@ -8825,7 +8895,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
-  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
+  MaybeGenerateMarkingRegisterCheck(/* code */ 18);
 }
 
 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8886,8 +8956,7 @@
     }
     UseScratchRegisterScope temps(GetVIXLAssembler());
     ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
-    uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
-        base.GetCode(), obj.GetCode(), narrow);
+    uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     {
@@ -8973,8 +9042,7 @@
 
     UseScratchRegisterScope temps(GetVIXLAssembler());
     ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
-    uint32_t custom_data =
-        linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+    uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(data_reg, obj, Operand(data_offset));
@@ -9111,7 +9179,7 @@
 
 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
                                                     Location ref,
-                                                    vixl::aarch32::Register obj,
+                                                    vixl32::Register obj,
                                                     uint32_t offset,
                                                     Location index,
                                                     ScaleFactor scale_factor,
@@ -9451,7 +9519,7 @@
   return &patches->back();
 }
 
-vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
   baker_read_barrier_patches_.emplace_back(custom_data);
   return &baker_read_barrier_patches_.back().label;
 }
@@ -9548,6 +9616,45 @@
   DCHECK_EQ(size, linker_patches->size());
 }
 
+bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+         patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
+                                         /*out*/ ArenaVector<uint8_t>* code,
+                                         /*out*/ std::string* debug_name) {
+  arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
+  switch (patch.GetType()) {
+    case linker::LinkerPatch::Type::kCallRelative:
+      // The thunk just uses the entry point in the ArtMethod. This works even for calls
+      // to the generic JNI and interpreter trampolines.
+      assembler.LoadFromOffset(
+          arm::kLoadWord,
+          vixl32::pc,
+          vixl32::r0,
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      assembler.GetVIXLAssembler()->Bkpt(0);
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "MethodCallThunk";
+      }
+      break;
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+      DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+      CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+      UNREACHABLE();
+  }
+
+  // Ensure we emit the literal pool if any.
+  assembler.FinalizeCode();
+  code->resize(assembler.CodeSize());
+  MemoryRegion code_region(code->data(), code->size());
+  assembler.FinalizeInstructions(code_region);
+}
+
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
     uint32_t value,
     Uint32ToLiteralMap* map) {
@@ -9792,5 +9899,210 @@
 #undef QUICK_ENTRY_POINT
 #undef TODO_VIXL32
 
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
+                                     vixl32::Register base_reg,
+                                     vixl32::MemOperand& lock_word,
+                                     vixl32::Label* slow_path,
+                                     int32_t raw_ldr_offset) {
+  // Load the lock word containing the rb_state.
+  __ Ldr(ip, lock_word);
+  // Given the numeric representation, it's enough to check the low bit of the rb_state.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+  __ B(ne, slow_path, /* is_far_target */ false);
+  __ Add(lr, lr, raw_ldr_offset);
+  // Introduce a dependency on the lock_word including rb_state,
+  // to prevent load-load reordering, and without using
+  // a memory barrier (which would be more expensive).
+  __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+  __ Bx(lr);          // And return back to the function.
+  // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`
+static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
+                                                       vixl32::Register entrypoint) {
+  // The register where the read barrier introspection entrypoint is loaded
+  // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4).
+  DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
+  // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+  DCHECK_EQ(ip.GetCode(), 12u);
+  const int32_t entry_point_offset =
+      Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+  __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+                                                        uint32_t encoded_data,
+                                                        /*out*/ std::string* debug_name) {
+  BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+  switch (kind) {
+    case BakerReadBarrierKind::kField: {
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
+      // that performs further checks on the reference and marks it if needed.
+      vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+      CheckValidReg(holder_reg.GetCode());
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      // If base_reg differs from holder_reg, the offset was too large and we must have
+      // emitted an explicit null check before the load. Otherwise, we need to null-check
+      // the holder as we do not necessarily do that check before going to the thunk.
+      vixl32::Label throw_npe;
+      if (holder_reg.Is(base_reg)) {
+        __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
+      }
+      vixl32::Label slow_path;
+      MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+      const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+          : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      if (width == BakerReadBarrierWidth::kWide) {
+        MemOperand ldr_half_address(lr, ldr_offset + 2);
+        __ Ldrh(ip, ldr_half_address);        // Load the LDR immediate half-word with "Rt | imm12".
+        __ Ubfx(ip, ip, 0, 12);               // Extract the offset imm12.
+        __ Ldr(ip, MemOperand(base_reg, ip));   // Load the reference.
+      } else {
+        MemOperand ldr_address(lr, ldr_offset);
+        __ Ldrh(ip, ldr_address);             // Load the LDR immediate, encoding T1.
+        __ Add(ep_reg,                        // Adjust the entrypoint address to the entrypoint
+               ep_reg,                        // for narrow LDR.
+               Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+        __ Ubfx(ip, ip, 6, 5);                // Extract the imm5, i.e. offset / 4.
+        __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2));   // Load the reference.
+      }
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+      __ Bx(ep_reg);                          // Jump to the entrypoint.
+      if (holder_reg.Is(base_reg)) {
+        // Add null check slow path. The stack map is at the address pointed to by LR.
+        __ Bind(&throw_npe);
+        int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
+        __ Ldr(ip, MemOperand(/* Thread* */ vixl32::r9, offset));
+        __ Bx(ip);
+      }
+      break;
+    }
+    case BakerReadBarrierKind::kArray: {
+      vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(base_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl32::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+      const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      __ Bind(&slow_path);
+      const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+                                 raw_ldr_offset;
+      MemOperand ldr_address(lr, ldr_offset + 2);
+      __ Ldrb(ip, ldr_address);               // Load the LDR (register) byte with "00 | imm2 | Rm",
+                                              // i.e. Rm+32 because the scale in imm2 is 2.
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      __ Bfi(ep_reg, ip, 3, 6);               // Insert ip to the entrypoint address to create
+                                              // a switch case target based on the index register.
+      __ Mov(ip, base_reg);                   // Move the base register to ip0.
+      __ Bx(ep_reg);                          // Jump to the entrypoint's array switch case.
+      break;
+    }
+    case BakerReadBarrierKind::kGcRoot: {
+      // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+      // and it does not have a forwarding address), call the correct introspection entrypoint;
+      // otherwise return the reference (or the extracted forwarding address).
+      // There is no gray bit check for GC roots.
+      vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+      CheckValidReg(root_reg.GetCode());
+      DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                BakerReadBarrierSecondRegField::Decode(encoded_data));
+      BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip);
+      vixl32::Label return_label, not_marked, forwarding_address;
+      __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false);
+      MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+      __ Ldr(ip, lock_word);
+      __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+      __ B(eq, &not_marked);
+      __ Bind(&return_label);
+      __ Bx(lr);
+      __ Bind(&not_marked);
+      static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+                    "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+                    " the highest bits and the 'forwarding address' state to have all bits set");
+      __ Cmp(ip, Operand(0xc0000000));
+      __ B(hs, &forwarding_address);
+      vixl32::Register ep_reg(kBakerCcEntrypointRegister);
+      LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+      // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+      // to art_quick_read_barrier_mark_introspection_gc_roots.
+      int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+          ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+          : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+      __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+      __ Mov(ip, root_reg);
+      __ Bx(ep_reg);
+      __ Bind(&forwarding_address);
+      __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+      __ Bx(lr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+      UNREACHABLE();
+  }
+
+  if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+    std::ostringstream oss;
+    oss << "BakerReadBarrierThunk";
+    switch (kind) {
+      case BakerReadBarrierKind::kField:
+        oss << "Field";
+        if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+          oss << "Wide";
+        }
+        oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+            << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+        break;
+      case BakerReadBarrierKind::kArray:
+        oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+        break;
+      case BakerReadBarrierKind::kGcRoot:
+        oss << "GcRoot";
+        if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+          oss << "Wide";
+        }
+        oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+        DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+                  BakerReadBarrierSecondRegField::Decode(encoded_data));
+        break;
+    }
+    *debug_name = oss.str();
+  }
+}
+
+#undef __
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 2114ea1..6b9919a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -36,6 +36,11 @@
 #pragma GCC diagnostic pop
 
 namespace art {
+
+namespace linker {
+class Thumb2RelativePatcherTest;
+}  // namespace linker
+
 namespace arm {
 
 // This constant is used as an approximate margin when emission of veneer and literal pools
@@ -108,6 +113,9 @@
 static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
     arraysize(kRuntimeParameterFpuRegistersVIXL);
 
+// The reserved entrypoint register for link-time generated thunks.
+const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4;
+
 class LoadClassSlowPathARMVIXL;
 class CodeGeneratorARMVIXL;
 
@@ -388,16 +396,6 @@
                                          uint32_t offset,
                                          Location maybe_temp,
                                          ReadBarrierOption read_barrier_option);
-  // Generate a GC root reference load:
-  //
-  //   root <- *(obj + offset)
-  //
-  // while honoring read barriers based on read_barrier_option.
-  void GenerateGcRootFieldLoad(HInstruction* instruction,
-                               Location root,
-                               vixl::aarch32::Register obj,
-                               uint32_t offset,
-                               ReadBarrierOption read_barrier_option);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
@@ -606,6 +604,10 @@
                                                 Handle<mirror::Class> handle);
 
   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+  bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE;
+  void EmitThunkCode(const linker::LinkerPatch& patch,
+                     /*out*/ ArenaVector<uint8_t>* code,
+                     /*out*/ std::string* debug_name) OVERRIDE;
 
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
@@ -613,6 +615,16 @@
   // is added only for AOT compilation if link-time generated thunks for fields are enabled.
   void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
 
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers based on read_barrier_option.
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch32::Register obj,
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -767,6 +779,83 @@
                                  vixl::aarch32::Register temp = vixl32::Register());
 
  private:
+  // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+  enum class BakerReadBarrierKind : uint8_t {
+    kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
+    kGcRoot,  // GC root load.
+    kLast = kGcRoot
+  };
+
+  enum class BakerReadBarrierWidth : uint8_t {
+    kWide,          // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+    kNarrow,        // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+    kLast = kNarrow
+  };
+
+  static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u;
+
+  static constexpr size_t kBitsForBakerReadBarrierKind =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+  static constexpr size_t kBakerReadBarrierBitsForRegister =
+      MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+  using BakerReadBarrierKindField =
+      BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+  using BakerReadBarrierFirstRegField =
+      BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+  using BakerReadBarrierSecondRegField =
+      BitField<uint32_t,
+               kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+               kBakerReadBarrierBitsForRegister>;
+  static constexpr size_t kBitsForBakerReadBarrierWidth =
+      MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+  using BakerReadBarrierWidthField =
+      BitField<BakerReadBarrierWidth,
+               kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister,
+               kBitsForBakerReadBarrierWidth>;
+
+  static void CheckValidReg(uint32_t reg) {
+    DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg;
+  }
+
+  static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+                                                  uint32_t holder_reg,
+                                                  bool narrow) {
+    CheckValidReg(base_reg);
+    CheckValidReg(holder_reg);
+    DCHECK(!narrow || base_reg < 8u) << base_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(holder_reg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+  }
+
+  static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+    CheckValidReg(root_reg);
+    DCHECK(!narrow || root_reg < 8u) << root_reg;
+    BakerReadBarrierWidth width =
+        narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+           BakerReadBarrierFirstRegField::Encode(root_reg) |
+           BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+           BakerReadBarrierWidthField::Encode(width);
+  }
+
+  void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+                                    uint32_t encoded_data,
+                                    /*out*/ std::string* debug_name);
+
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
 
@@ -829,6 +918,7 @@
   // Patches for class literals in JIT compiled code.
   TypeToLiteralMap jit_class_patches_;
 
+  friend class linker::Thumb2RelativePatcherTest;
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
 };
 
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index c41c290..792cfb5 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -195,7 +195,9 @@
   }
 
   size_t GetSize() const { return size_; }
-  uint8_t* GetMemory() const { return memory_.get(); }
+  ArrayRef<const uint8_t> GetMemory() const OVERRIDE {
+    return ArrayRef<const uint8_t>(memory_.get(), size_);
+  }
 
  private:
   size_t size_;
@@ -269,8 +271,8 @@
   InstructionSet target_isa = codegen.GetInstructionSet();
 
   typedef Expected (*fptr)();
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size());
+  fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data()));
   if (target_isa == InstructionSet::kThumb2) {
     // For thumb we need the bottom bit set.
     f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index d20b681..2e189fd 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -105,15 +105,15 @@
              const std::vector<uint8_t>& expected_asm,
              const std::vector<uint8_t>& expected_cfi) {
     // Get the outputs.
-    const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+    ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
     Assembler* opt_asm = code_gen_->GetAssembler();
-    const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+    ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
 
     if (kGenerateExpected) {
       GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
     } else {
-      EXPECT_EQ(expected_asm, actual_asm);
-      EXPECT_EQ(expected_cfi, actual_cfi);
+      EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm);
+      EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi);
     }
   }
 
@@ -140,7 +140,7 @@
       return memory_.data();
     }
 
-    const std::vector<uint8_t>& GetMemory() { return memory_; }
+    ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
 
    private:
     std::vector<uint8_t> memory_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e42dfc1..7916582 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -75,22 +75,18 @@
 class CodeVectorAllocator FINAL : public CodeAllocator {
  public:
   explicit CodeVectorAllocator(ArenaAllocator* allocator)
-      : memory_(allocator->Adapter(kArenaAllocCodeBuffer)),
-        size_(0) {}
+      : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
 
   virtual uint8_t* Allocate(size_t size) {
-    size_ = size;
     memory_.resize(size);
     return &memory_[0];
   }
 
-  size_t GetSize() const { return size_; }
-  const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+  ArrayRef<const uint8_t> GetMemory() const OVERRIDE { return ArrayRef<const uint8_t>(memory_); }
   uint8_t* GetData() { return memory_.data(); }
 
  private:
   ArenaVector<uint8_t> memory_;
-  size_t size_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
 };
@@ -719,7 +715,7 @@
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       GetCompilerDriver(),
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
+      code_allocator->GetMemory(),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
@@ -731,6 +727,16 @@
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const linker::LinkerPatch>(linker_patches));
 
+  CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage();
+  for (const linker::LinkerPatch& patch : linker_patches) {
+    if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
+      ArenaVector<uint8_t> code(allocator->Adapter());
+      std::string debug_name;
+      codegen->EmitThunkCode(patch, &code, &debug_name);
+      storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name);
+    }
+  }
+
   return compiled_method;
 }
 
@@ -1339,7 +1345,7 @@
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
-      code_allocator.GetSize(),
+      code_allocator.GetMemory().size(),
       data_size,
       osr,
       roots,
@@ -1369,7 +1375,7 @@
     info.is_optimized = true;
     info.is_code_address_text_relative = false;
     info.code_address = code_address;
-    info.code_size = code_allocator.GetSize();
+    info.code_size = code_allocator.GetMemory().size();
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1378,7 +1384,7 @@
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
   if (jit_logger != nullptr) {
-    jit_logger->WriteLog(code, code_allocator.GetSize(), method);
+    jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
   }
 
   if (kArenaAllocatorCountAllocations) {