Implement method calls using relative BL on ARM.

Store the linker patches with each CompiledMethod instead of
keeping them in CompilerDriver. Reorganize oat file creation
to apply the patches as we're writing the method code. Add
framework for platform-specific relative call patches in the
OatWriter. Implement relative call patches for ARM.

Change-Id: Ie2effb3d92b61ac8f356140eba09dc37d62290f8
diff --git a/compiler/ b/compiler/
index e74d6de..dd64368 100644
--- a/compiler/
+++ b/compiler/
@@ -27,6 +27,7 @@
 #include "dex_file-inl.h"
 #include "dex/verification_results.h"
 #include "gc/space/space.h"
+#include "image_writer.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -36,10 +37,270 @@
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
+#include "utils/arm/assembler_thumb2.h"
 #include "verifier/method_verifier.h"
 namespace art {
+class OatWriter::RelativeCallPatcher {
+ public:
+  virtual ~RelativeCallPatcher() { }
+  // Reserve space for relative call thunks if needed, return adjusted offset.
+  // After all methods have been processed it's call one last time with compiled_method == nullptr.
+  virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0;
+  // Write relative call thunks if needed, return adjusted offset.
+  virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
+  // Patch method code. The input displacement is relative to the patched location,
+  // the patcher may need to adjust it if the correct base is different.
+  virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+                     uint32_t target_offset) = 0;
+ protected:
+  RelativeCallPatcher() { }
+ private:
+  DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher);
+class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  NoRelativeCallPatcher() { }
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    return offset;  // No space reserved; no patches expected.
+  }
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    return offset;  // No thunks added; no patches expected.
+  }
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    LOG(FATAL) << "Unexpected relative patch.";
+  }
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher);
+class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  X86RelativeCallPatcher() { }
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    return offset;  // No space reserved; no limit on relative call distance.
+  }
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    return offset;  // No thunks added; no limit on relative call distance.
+  }
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+    uint32_t displacement = target_offset - patch_offset;
+    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+    typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+    reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
+  }
+ private:
+  // PC displacement from patch location; x86 PC for relative calls points to the next
+  // instruction and the patch location is 4 bytes earlier.
+  static constexpr int32_t kPcDisplacement = 4;
+  DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
+class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher {
+ public:
+  explicit Thumb2RelativeCallPatcher(OatWriter* writer)
+      : writer_(writer), thunk_code_(CompileThunkCode()),
+        thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
+  }
+  uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
+    // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
+    // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
+    // of code. To avoid any alignment discrepancies for the final chunk, we always align the
+    // offset after reserving of writing any chunk.
+    if (UNLIKELY(compiled_method == nullptr)) {
+      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+      bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
+      if (needs_thunk) {
+        thunk_locations_.push_back(aligned_offset);
+        offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2);
+      }
+      return offset;
+    }
+    DCHECK(compiled_method->GetQuickCode() != nullptr);
+    uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
+    uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+    uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
+    if (!unprocessed_patches_.empty() &&
+        next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) {
+      bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
+      if (needs_thunk) {
+        // A single thunk will cover all pending patches.
+        unprocessed_patches_.clear();
+        uint32_t thunk_location = compiled_method->AlignCode(offset);
+        thunk_locations_.push_back(thunk_location);
+        offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2);
+      }
+    }
+    for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+      if (patch.Type() == kLinkerPatchCallRelative) {
+        unprocessed_patches_.emplace_back(patch.TargetMethod(),
+                                          quick_code_offset + patch.LiteralOffset());
+      }
+    }
+    return offset;
+  }
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
+    if (current_thunk_to_write_ == thunk_locations_.size()) {
+      return offset;
+    }
+    uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+    if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
+      ++current_thunk_to_write_;
+      uint32_t aligned_code_delta = aligned_offset - offset;
+      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        return 0u;
+      }
+      if (!out->WriteFully(, thunk_code_.size())) {
+        return 0u;
+      }
+      writer_->size_relative_call_thunks_ += thunk_code_.size();
+      uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
+      // Align after writing chunk, see the ReserveSpace() above.
+      offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2);
+      aligned_code_delta = offset - thunk_end_offset;
+      if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+        return 0u;
+      }
+    }
+    return offset;
+  }
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    DCHECK_EQ(literal_offset & 1u, 0u);
+    DCHECK_EQ(patch_offset & 1u, 0u);
+    DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
+    // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+    uint32_t displacement = target_offset - 1u - patch_offset;
+    // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
+    if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) {
+      // Unwritten thunks have higher offsets, check if it's within range.
+      DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
+             thunk_locations_[current_thunk_to_write_] > patch_offset);
+      if (current_thunk_to_write_ != thunk_locations_.size() &&
+          thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) {
+        displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
+      } else {
+        // We must have a previous thunk then.
+        DCHECK_NE(current_thunk_to_write_, 0u);
+        DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
+        displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
+        DCHECK(displacement >= -kMaxNegativeDisplacement);
+      }
+    }
+    displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+    DCHECK_EQ(displacement & 1u, 0u);
+    DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
+    uint32_t signbit = (displacement >> 31) & 0x1;
+    uint32_t i1 = (displacement >> 23) & 0x1;
+    uint32_t i2 = (displacement >> 22) & 0x1;
+    uint32_t imm10 = (displacement >> 12) & 0x03ff;
+    uint32_t imm11 = (displacement >> 1) & 0x07ff;
+    uint32_t j1 = i1 ^ (signbit ^ 1);
+    uint32_t j2 = i2 ^ (signbit ^ 1);
+    uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
+    value |= 0xf000d000;  // BL
+    uint8_t* addr = &(*code)[literal_offset];
+    // Check that we're just overwriting an existing BL.
+    DCHECK_EQ(addr[1] & 0xf8, 0xf0);
+    DCHECK_EQ(addr[3] & 0xd0, 0xd0);
+    // Write the new BL.
+    addr[0] = (value >> 16) & 0xff;
+    addr[1] = (value >> 24) & 0xff;
+    addr[2] = (value >> 0) & 0xff;
+    addr[3] = (value >> 8) & 0xff;
+  }
+ private:
+  bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
+    // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+    while (!unprocessed_patches_.empty()) {
+      uint32_t patch_offset = unprocessed_patches_.front().second;
+      auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
+      if (it == writer_->method_offset_map_.end()) {
+        // If still unresolved, check if we have a thunk within range.
+        DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+        if (thunk_locations_.empty() ||
+            patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) {
+          return next_aligned_offset - patch_offset > kMaxPositiveDisplacement;
+        }
+      } else if (it->second >= patch_offset) {
+        DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement);
+      } else {
+        // When calling back, check if we have a thunk that's closer than the actual target.
+        uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
+            ? it->second
+            : thunk_locations_.back();
+        DCHECK_GT(patch_offset, target_offset);
+        if (patch_offset - target_offset > kMaxNegativeDisplacement) {
+          return true;
+        }
+      }
+      unprocessed_patches_.pop_front();
+    }
+    return false;
+  }
+  static std::vector<uint8_t> CompileThunkCode() {
+    // The thunk just uses the entry point in the ArtMethod. This works even for calls
+    // to the generic JNI and interpreter trampolines.
+    arm::Thumb2Assembler assembler;
+    assembler.LoadFromOffset(
+        arm::kLoadWord, arm::PC, arm::R0,
+        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+    assembler.bkpt(0);
+    std::vector<uint8_t> thunk_code(assembler.CodeSize());
+    MemoryRegion code(, thunk_code.size());
+    assembler.FinalizeInstructions(code);
+    return thunk_code;
+  }
+  // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
+  static constexpr int32_t kPcDisplacement = 4;
+  // Maximum positive and negative displacement measured from the patch location.
+  // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
+  // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
+  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
+  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
+  OatWriter* const writer_;
+  const std::vector<uint8_t> thunk_code_;
+  std::vector<uint32_t> thunk_locations_;
+  size_t current_thunk_to_write_;
+  // ReserveSpace() tracks unprocessed patches.
+  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+  std::deque<UnprocessedPatch> unprocessed_patches_;
+  DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -53,10 +314,14 @@
                      uintptr_t image_file_location_oat_begin,
                      int32_t image_patch_delta,
                      const CompilerDriver* compiler,
+                     ImageWriter* image_writer,
                      TimingLogger* timings,
                      SafeMap<std::string, std::string>* key_value_store)
   : compiler_driver_(compiler),
+    image_writer_(image_writer),
+    size_(0u),
+    oat_data_offset_(0u),
@@ -81,6 +346,7 @@
+    size_relative_call_thunks_(0),
@@ -92,9 +358,27 @@
-    size_oat_class_method_offsets_(0) {
+    size_oat_class_method_offsets_(0),
+    method_offset_map_() {
   CHECK(key_value_store != nullptr);
+  switch (compiler_driver_->GetInstructionSet()) {
+    case kX86:
+    case kX86_64:
+      relative_call_patcher_.reset(new X86RelativeCallPatcher);
+      break;
+    case kArm:
+      // Fall through: we generate Thumb2 code for "arm".
+    case kThumb2:
+      relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
+      break;
+    case kArm64:
+      // TODO: Implement relative calls for arm64.
+    default:
+      relative_call_patcher_.reset(new NoRelativeCallPatcher);
+      break;
+  }
   size_t offset;
     TimingLogger::ScopedTiming split("InitOatHeader", timings);
@@ -127,6 +411,7 @@
   size_ = offset;
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
+  CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
            key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
   CHECK_ALIGNED(image_patch_delta_, kPageSize);
@@ -316,6 +601,7 @@
     OatClass* oat_class = new OatClass(offset_, compiled_methods_,
                                        num_non_null_compiled_methods_, status);
+    oat_class->UpdateChecksum(writer_->oat_header_);
     offset_ += oat_class->SizeOf();
     return DexMethodVisitor::EndClass();
@@ -329,6 +615,16 @@
   InitCodeMethodVisitor(OatWriter* writer, size_t offset)
     : OatDexMethodVisitor(writer, offset) {
+    writer_->absolute_patch_locations_.reserve(
+        writer_->compiler_driver_->GetNonRelativeLinkerPatchCount());
+  }
+  bool EndClass() {
+    OatDexMethodVisitor::EndClass();
+    if (oat_class_index_ == writer_->oat_classes_.size()) {
+      offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr);
+    }
+    return true;
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
@@ -350,6 +646,7 @@
             oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
       } else {
         CHECK(quick_code != nullptr);
+        offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method);
         offset_ = compiled_method->AlignCode(offset_);
@@ -369,6 +666,18 @@
           dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
+        MethodReference method_ref(dex_file_, it.GetMemberIndex());
+        auto method_lb = writer_->method_offset_map_.lower_bound(method_ref);
+        if (method_lb != writer_->method_offset_map_.end() &&
+            !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) {
+          // TODO: Should this be a hard failure?
+          LOG(WARNING) << "Multiple definitions of "
+              << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
+              << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
+        } else {
+          writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset);
+        }
         // Update quick method header.
         DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
         OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
@@ -392,12 +701,19 @@
                                               frame_size_in_bytes, core_spill_mask, fp_spill_mask,
-        // Update checksum if this wasn't a duplicate.
         if (!deduped) {
-          writer_->oat_header_->UpdateChecksum(method_header, sizeof(*method_header));
+          // Update offsets. (Checksum is updated when writing.)
           offset_ += sizeof(*method_header);  // Method header is prepended before code.
-          writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           offset_ += code_size;
+          // Record absolute patch locations.
+          if (!compiled_method->GetPatches().empty()) {
+            uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset();
+            for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+              if (patch.Type() != kLinkerPatchCallRelative) {
+                writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
+              }
+            }
+          }
         if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
@@ -548,13 +864,51 @@
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
   WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
-                             size_t relative_offset)
+                         size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
     : OatDexMethodVisitor(writer, relative_offset),
-      file_offset_(file_offset) {
+      file_offset_(file_offset),
+      self_(Thread::Current()),
+      old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")),
+      class_linker_(Runtime::Current()->GetClassLinker()),
+      dex_cache_(nullptr) {
+    if (writer_->image_writer_ != nullptr) {
+      // If we're creating the image, the address space must be ready so that we can apply patches.
+      CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
+      patched_code_.reserve(16 * KB);
+    }
+    self_->TransitionFromSuspendedToRunnable();
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+  ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
+    self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_);
+    self_->TransitionFromRunnableToSuspended(kNative);
+  }
+  bool StartClass(const DexFile* dex_file, size_t class_def_index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    OatDexMethodVisitor::StartClass(dex_file, class_def_index);
+    if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
+      dex_cache_ = class_linker_->FindDexCache(*dex_file);
+    }
+    return true;
+  }
+  bool EndClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool result = OatDexMethodVisitor::EndClass();
+    if (oat_class_index_ == writer_->oat_classes_.size()) {
+      DCHECK(result);  // OatDexMethodVisitor::EndClass() never fails.
+      offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_);
+      if (UNLIKELY(offset_ == 0u)) {
+        PLOG(ERROR) << "Failed to write final relative call thunks";
+        result = false;
+      }
+    }
+    return result;
+  }
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     OatClass* oat_class = writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
@@ -565,18 +919,18 @@
       const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
       if (quick_code != nullptr) {
         CHECK(compiled_method->GetPortableCode() == nullptr);
+        offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_);
+        if (offset_ == 0u) {
+          ReportWriteFailure("relative call thunk", it);
+          return false;
+        }
         uint32_t aligned_offset = compiled_method->AlignCode(offset_);
         uint32_t aligned_code_delta = aligned_offset - offset_;
         if (aligned_code_delta != 0) {
-          static const uint8_t kPadding[] = {
-              0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
-          };
-          DCHECK_LE(aligned_code_delta, sizeof(kPadding));
-          if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+          if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
             ReportWriteFailure("code alignment padding", it);
             return false;
-          writer_->size_code_alignment_ += aligned_code_delta;
           offset_ += aligned_code_delta;
@@ -591,7 +945,9 @@
                    offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         if (method_offsets.code_offset_ >= offset_) {
-          const OatQuickMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_];
+          const OatQuickMethodHeader& method_header =
+              oat_class->method_headers_[method_offsets_index_];
+          writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
           if (!out->WriteFully(&method_header, sizeof(method_header))) {
             ReportWriteFailure("method header", it);
             return false;
@@ -599,6 +955,31 @@
           writer_->size_method_header_ += sizeof(method_header);
           offset_ += sizeof(method_header);
+          if (!compiled_method->GetPatches().empty()) {
+            patched_code_ =  *quick_code;
+            quick_code = &patched_code_;
+            for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+              if (patch.Type() == kLinkerPatchCallRelative) {
+                // NOTE: Relative calls across oat files are not supported.
+                uint32_t target_offset = GetTargetOffset(patch);
+                uint32_t literal_offset = patch.LiteralOffset();
+                writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset,
+                                                       offset_ + literal_offset, target_offset);
+              } else if (patch.Type() == kLinkerPatchCall) {
+                uint32_t target_offset = GetTargetOffset(patch);
+                PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
+              } else if (patch.Type() == kLinkerPatchMethod) {
+                mirror::ArtMethod* method = GetTargetMethod(patch);
+                PatchObjectAddress(&patched_code_, patch.LiteralOffset(), method);
+              } else if (patch.Type() == kLinkerPatchType) {
+                mirror::Class* type = GetTargetType(patch);
+                PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type);
+              }
+            }
+          }
+          writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
           if (!out->WriteFully(&(*quick_code)[0], code_size)) {
             ReportWriteFailure("method code", it);
             return false;
@@ -617,11 +998,81 @@
   OutputStream* const out_;
   size_t const file_offset_;
+  Thread* const self_;
+  const char* const old_no_thread_suspension_cause_;  // TODO: Use ScopedAssertNoThreadSuspension.
+  ClassLinker* const class_linker_;
+  mirror::DexCache* dex_cache_;
+  std::vector<uint8_t> patched_code_;
   void ReportWriteFailure(const char* what, const ClassDataItemIterator& it) {
     PLOG(ERROR) << "Failed to write " << what << " for "
         << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
+  mirror::ArtMethod* GetTargetMethod(const LinkerPatch& patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    MethodReference ref = patch.TargetMethod();
+    mirror::DexCache* dex_cache =
+        (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(*ref.dex_file);
+    mirror::ArtMethod* method = dex_cache->GetResolvedMethod(ref.dex_method_index);
+    CHECK(method != nullptr);
+    return method;
+  }
+  uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto target_it = writer_->method_offset_map_.find(patch.TargetMethod());
+    uint32_t target_offset =
+        (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u;
+    // If there's no compiled code, point to the correct trampoline.
+    if (UNLIKELY(target_offset == 0)) {
+      mirror::ArtMethod* target = GetTargetMethod(patch);
+      DCHECK(target != nullptr);
+      DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u);
+      target_offset = target->IsNative()
+          ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
+          : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+    }
+    return target_offset;
+  }
+  mirror::Class* GetTargetType(const LinkerPatch& patch)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile())
+        ? dex_cache_ : class_linker_->FindDexCache(*patch.TargetTypeDexFile());
+    mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
+    CHECK(type != nullptr);
+    return type;
+  }
+  void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // NOTE: Direct method pointers across oat files don't use linker patches. However, direct
+    // type pointers across oat files do. (TODO: Investigate why.)
+    if (writer_->image_writer_ != nullptr) {
+      object = writer_->image_writer_->GetImageAddress(object);
+    }
+    uint32_t address = PointerToLowMemUInt32(object);
+    DCHECK_LE(offset + 4, code->size());
+    uint8_t* data = &(*code)[offset];
+    data[0] = address & 0xffu;
+    data[1] = (address >> 8) & 0xffu;
+    data[2] = (address >> 16) & 0xffu;
+    data[3] = (address >> 24) & 0xffu;
+  }
+  void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // NOTE: Direct calls across oat files don't use linker patches.
+    DCHECK(writer_->image_writer_ != nullptr);
+    uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+                                             writer_->oat_data_offset_ + target_offset);
+    DCHECK_LE(offset + 4, code->size());
+    uint8_t* data = &(*code)[offset];
+    data[0] = address & 0xffu;
+    data[1] = (address >> 8) & 0xffu;
+    data[2] = (address >> 16) & 0xffu;
+    data[3] = (address >> 24) & 0xffu;
+  }
 template <typename DataAccess>
@@ -863,11 +1314,17 @@
 bool OatWriter::Write(OutputStream* out) {
-  const size_t file_offset = out->Seek(0, kSeekCurrent);
+  const off_t raw_file_offset = out->Seek(0, kSeekCurrent);
+  if (raw_file_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to get file offset in " << out->GetLocation();
+    return false;
+  }
+  const size_t file_offset = static_cast<size_t>(raw_file_offset);
+  // Reserve space for header. It will be written last - after updating the checksum.
   size_t header_size = oat_header_->GetHeaderSize();
-  if (!out->WriteFully(oat_header_, header_size)) {
-    PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
+  if (out->Seek(header_size, kSeekCurrent) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
     return false;
   size_oat_header_ += sizeof(OatHeader);
@@ -878,7 +1335,12 @@
     return false;
-  size_t relative_offset = out->Seek(0, kSeekCurrent) - file_offset;
+  off_t tables_end_offset = out->Seek(0, kSeekCurrent);
+  if (tables_end_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
+    return false;
+  }
+  size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
     LOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
@@ -897,6 +1359,12 @@
     return false;
+  const off_t oat_end_file_offset = out->Seek(0, kSeekCurrent);
+  if (oat_end_file_offset == (off_t) -1) {
+    LOG(ERROR) << "Failed to get oat end file offset in " << out->GetLocation();
+    return false;
+  }
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
@@ -922,6 +1390,7 @@
+    DO_STAT(size_relative_call_thunks_);
@@ -937,13 +1406,29 @@
     #undef DO_STAT
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)"; \
-    CHECK_EQ(file_offset + size_total, static_cast<uint32_t>(out->Seek(0, kSeekCurrent)));
+    CHECK_EQ(file_offset + size_total, static_cast<size_t>(oat_end_file_offset));
     CHECK_EQ(size_, size_total);
-  CHECK_EQ(file_offset + size_, static_cast<uint32_t>(out->Seek(0, kSeekCurrent)));
+  CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
   CHECK_EQ(size_, relative_offset);
+  // Write the header now that the checksum is final.
+  if (out->Seek(file_offset, kSeekSet) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(raw_file_offset, out->Seek(0, kSeekCurrent));
+  if (!out->WriteFully(oat_header_, header_size)) {
+    PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
+    return false;
+  }
+  if (out->Seek(oat_end_file_offset, kSeekSet) == (off_t) -1) {
+    PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+    return false;
+  }
+  DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
   return true;
@@ -1070,6 +1555,18 @@
   return relative_offset;
+bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
+  static const uint8_t kPadding[] = {
+      0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+  };
+  DCHECK_LE(aligned_code_delta, sizeof(kPadding));
+  if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+    return false;
+  }
+  size_code_alignment_ += aligned_code_delta;
+  return true;
 OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
   offset_ = offset;
   const std::string& location(dex_file.GetLocation());