Implement method calls using relative BL on ARM64.
Change-Id: I9e5d0b6c100b6cddd6bbb7ab07cff77ab104ea31
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index dd64368..e64d2ab 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -38,6 +38,7 @@
#include "scoped_thread_state_change.h"
#include "handle_scope-inl.h"
#include "utils/arm/assembler_thumb2.h"
+#include "utils/arm64/assembler_arm64.h"
#include "verifier/method_verifier.h"
namespace art {
@@ -117,10 +118,14 @@
DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
};
-class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher {
+class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher {
public:
- explicit Thumb2RelativeCallPatcher(OatWriter* writer)
- : writer_(writer), thunk_code_(CompileThunkCode()),
+ ArmBaseRelativeCallPatcher(OatWriter* writer,
+ InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
+ uint32_t max_positive_displacement, uint32_t max_negative_displacement)
+ : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code),
+ max_positive_displacement_(max_positive_displacement),
+ max_negative_displacement_(max_negative_displacement),
thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
}
@@ -130,11 +135,11 @@
// of code. To avoid any alignment discrepancies for the final chunk, we always align the
// offset after reserving of writing any chunk.
if (UNLIKELY(compiled_method == nullptr)) {
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
if (needs_thunk) {
thunk_locations_.push_back(aligned_offset);
- offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2);
+ offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
}
return offset;
}
@@ -143,14 +148,14 @@
uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
if (!unprocessed_patches_.empty() &&
- next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) {
+ next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
if (needs_thunk) {
// A single thunk will cover all pending patches.
unprocessed_patches_.clear();
uint32_t thunk_location = compiled_method->AlignCode(offset);
thunk_locations_.push_back(thunk_location);
- offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2);
+ offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
}
}
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
@@ -166,7 +171,7 @@
if (current_thunk_to_write_ == thunk_locations_.size()) {
return offset;
}
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
++current_thunk_to_write_;
uint32_t aligned_code_delta = aligned_offset - offset;
@@ -179,7 +184,7 @@
writer_->size_relative_call_thunks_ += thunk_code_.size();
uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
// Align after writing chunk, see the ReserveSpace() above.
- offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2);
+ offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
aligned_code_delta = offset - thunk_end_offset;
if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
return 0u;
@@ -188,30 +193,88 @@
return offset;
}
- void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) OVERRIDE {
- DCHECK_LE(literal_offset + 4u, code->size());
- DCHECK_EQ(literal_offset & 1u, 0u);
- DCHECK_EQ(patch_offset & 1u, 0u);
- DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
+ protected:
+ uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) {
// Unsigned arithmetic with its well-defined overflow behavior is just fine here.
- uint32_t displacement = target_offset - 1u - patch_offset;
+ uint32_t displacement = target_offset - patch_offset;
// NOTE: With unsigned arithmetic we do mean to use && rather than || below.
- if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) {
+ if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
// Unwritten thunks have higher offsets, check if it's within range.
DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
thunk_locations_[current_thunk_to_write_] > patch_offset);
if (current_thunk_to_write_ != thunk_locations_.size() &&
- thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) {
+ thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
} else {
// We must have a previous thunk then.
DCHECK_NE(current_thunk_to_write_, 0u);
DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
- DCHECK(displacement >= -kMaxNegativeDisplacement);
+ DCHECK(displacement >= -max_negative_displacement_);
}
}
+ return displacement;
+ }
+
+ private:
+ bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
+ // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+ while (!unprocessed_patches_.empty()) {
+ uint32_t patch_offset = unprocessed_patches_.front().second;
+ auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
+ if (it == writer_->method_offset_map_.end()) {
+ // If still unresolved, check if we have a thunk within range.
+ DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+ if (thunk_locations_.empty() ||
+ patch_offset - thunk_locations_.back() > max_negative_displacement_) {
+ return next_aligned_offset - patch_offset > max_positive_displacement_;
+ }
+ } else if (it->second >= patch_offset) {
+ DCHECK_LE(it->second - patch_offset, max_positive_displacement_);
+ } else {
+ // When calling back, check if we have a thunk that's closer than the actual target.
+ uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
+ ? it->second
+ : thunk_locations_.back();
+ DCHECK_GT(patch_offset, target_offset);
+ if (patch_offset - target_offset > max_negative_displacement_) {
+ return true;
+ }
+ }
+ unprocessed_patches_.pop_front();
+ }
+ return false;
+ }
+
+ OatWriter* const writer_;
+ const InstructionSet instruction_set_;
+ const std::vector<uint8_t> thunk_code_;
+ const uint32_t max_positive_displacement_;
+ const uint32_t max_negative_displacement_;
+ std::vector<uint32_t> thunk_locations_;
+ size_t current_thunk_to_write_;
+
+ // ReserveSpace() tracks unprocessed patches.
+ typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+ std::deque<UnprocessedPatch> unprocessed_patches_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher);
+};
+
+class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+ explicit Thumb2RelativeCallPatcher(OatWriter* writer)
+ : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+ }
+
+ void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+ uint32_t target_offset) OVERRIDE {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 1u, 0u);
+ DCHECK_EQ(patch_offset & 1u, 0u);
+ DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
DCHECK_EQ(displacement & 1u, 0u);
DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed.
@@ -237,35 +300,6 @@
}
private:
- bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
- // Process as many patches as possible, stop only on unresolved targets or calls too far back.
- while (!unprocessed_patches_.empty()) {
- uint32_t patch_offset = unprocessed_patches_.front().second;
- auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
- if (it == writer_->method_offset_map_.end()) {
- // If still unresolved, check if we have a thunk within range.
- DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
- if (thunk_locations_.empty() ||
- patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) {
- return next_aligned_offset - patch_offset > kMaxPositiveDisplacement;
- }
- } else if (it->second >= patch_offset) {
- DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement);
- } else {
- // When calling back, check if we have a thunk that's closer than the actual target.
- uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
- ? it->second
- : thunk_locations_.back();
- DCHECK_GT(patch_offset, target_offset);
- if (patch_offset - target_offset > kMaxNegativeDisplacement) {
- return true;
- }
- }
- unprocessed_patches_.pop_front();
- }
- return false;
- }
-
static std::vector<uint8_t> CompileThunkCode() {
// The thunk just uses the entry point in the ArtMethod. This works even for calls
// to the generic JNI and interpreter trampolines.
@@ -289,18 +323,60 @@
static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
- OatWriter* const writer_;
- const std::vector<uint8_t> thunk_code_;
- std::vector<uint32_t> thunk_locations_;
- size_t current_thunk_to_write_;
-
- // ReserveSpace() tracks unprocessed patches.
- typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
- std::deque<UnprocessedPatch> unprocessed_patches_;
-
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
};
+class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+ explicit Arm64RelativeCallPatcher(OatWriter* writer)
+ : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+ }
+
+ void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+ uint32_t target_offset) OVERRIDE {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 3u, 0u);
+ DCHECK_EQ(patch_offset & 3u, 0u);
+ DCHECK_EQ(target_offset & 3u, 0u);
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ DCHECK_EQ(displacement & 3u, 0u);
+ DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed.
+ uint32_t value = (displacement & 0x0fffffffu) >> 2;
+ value |= 0x94000000; // BL
+
+ uint8_t* addr = &(*code)[literal_offset];
+ // Check that we're just overwriting an existing BL.
+ DCHECK_EQ(addr[3] & 0xfc, 0x94);
+ // Write the new BL.
+ addr[0] = (value >> 0) & 0xff;
+ addr[1] = (value >> 8) & 0xff;
+ addr[2] = (value >> 16) & 0xff;
+ addr[3] = (value >> 24) & 0xff;
+ }
+
+ private:
+ static std::vector<uint8_t> CompileThunkCode() {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ arm64::Arm64Assembler assembler;
+ Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ std::vector<uint8_t> thunk_code(assembler.CodeSize());
+ MemoryRegion code(thunk_code.data(), thunk_code.size());
+ assembler.FinalizeInstructions(code);
+ return thunk_code;
+ }
+
+ // Maximum positive and negative displacement measured from the patch location.
+ // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
+ // the ARM64 PC pointing to the BL.)
+ static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
+ static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
+
+ DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher);
+};
+
#define DCHECK_OFFSET() \
DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
<< "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -373,7 +449,8 @@
relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
break;
case kArm64:
- // TODO: Implement relative calls for arm64.
+ relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this));
+ break;
default:
relative_call_patcher_.reset(new NoRelativeCallPatcher);
break;
@@ -868,8 +945,8 @@
: OatDexMethodVisitor(writer, relative_offset),
out_(out),
file_offset_(file_offset),
- self_(Thread::Current()),
- old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")),
+ soa_(Thread::Current()),
+ no_thread_suspension_(soa_.Self(), "OatWriter patching"),
class_linker_(Runtime::Current()->GetClassLinker()),
dex_cache_(nullptr) {
if (writer_->image_writer_ != nullptr) {
@@ -877,12 +954,9 @@
CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
patched_code_.reserve(16 * KB);
}
- self_->TransitionFromSuspendedToRunnable();
}
~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
- self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_);
- self_->TransitionFromRunnableToSuspended(kNative);
}
bool StartClass(const DexFile* dex_file, size_t class_def_index)
@@ -997,9 +1071,9 @@
private:
OutputStream* const out_;
- size_t const file_offset_;
- Thread* const self_;
- const char* const old_no_thread_suspension_cause_; // TODO: Use ScopedAssertNoThreadSuspension.
+ const size_t file_offset_;
+ const ScopedObjectAccess soa_;
+ const ScopedAssertNoThreadSuspension no_thread_suspension_;
ClassLinker* const class_linker_;
mirror::DexCache* dex_cache_;
std::vector<uint8_t> patched_code_;