Implement method calls using relative BL on ARM64.
Change-Id: I9e5d0b6c100b6cddd6bbb7ab07cff77ab104ea31
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index ab71921..a87b06a 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -117,6 +117,7 @@
#define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value)
#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
+#define IS_SIGNED_IMM26(value) IS_SIGNED_IMM(26, value)
// Quick macro used to define the registers.
#define A64_REGISTER_CODE_LIST(R) \
@@ -240,6 +241,7 @@
kA64B2ct, // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
kA64Blr1x, // blr [1101011000111111000000] rn[9-5] [00000].
kA64Br1x, // br [1101011000011111000000] rn[9-5] [00000].
+ kA64Bl1t, // bl [100101] imm26[25-0].
kA64Brk1d, // brk [11010100001] imm_16[20-5] [00000].
kA64B1t, // b [00010100] offset_26[25-0].
kA64Cbnz2rt, // cbnz[00110101] imm_19[23-5] rt[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index b1cf279..7c663a9 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -155,6 +155,10 @@
kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
"br", "!0x", kFixupNone),
+ ENCODING_MAP(kA64Bl1t, NO_VARIANTS(0x94000000),
+ kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+ "bl", "!0T", kFixupLabel),
ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
@@ -873,7 +877,7 @@
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
DCHECK_EQ(delta & 0x3, 0);
- if (!IS_SIGNED_IMM19(delta >> 2)) {
+ if (!IS_SIGNED_IMM26(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
}
lir->operands[0] = delta >> 2;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6081f28..e8de876 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -21,6 +21,8 @@
#include "dex/quick/mir_to_lir-inl.h"
#include "gc/accounting/card_table.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/art_method.h"
+#include "mirror/object_array-inl.h"
namespace art {
@@ -433,4 +435,117 @@
NewLIR0(kA64Ret);
}
+static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
+ // Always emit relative calls.
+ return true;
+}
+
+/*
+ * Bit of a hack here - in the absence of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t unused,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type) {
+ Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+ if (direct_code != 0 && direct_method != 0) {
+ switch (state) {
+ case 0: // Get the current Method* [sets kArg0]
+ if (direct_code != static_cast<uintptr_t>(-1)) {
+ cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ } else if (Arm64UseRelativeCall(cu, target_method)) {
+ // Defer to linker patch.
+ } else {
+ cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+ }
+ if (direct_method != static_cast<uintptr_t>(-1)) {
+ cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+ } else {
+ cg->LoadMethodAddress(target_method, type, kArg0);
+ }
+ break;
+ default:
+ return -1;
+ }
+ } else {
+ RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+ switch (state) {
+ case 0: // Get the current Method* [sets kArg0]
+ // TUNING: we can save a reg copy if Method* has been promoted.
+ cg->LoadCurrMethodDirect(arg0_ref);
+ break;
+ case 1: // Get method->dex_cache_resolved_methods_
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ // Set up direct code if known.
+ if (direct_code != 0) {
+ if (direct_code != static_cast<uintptr_t>(-1)) {
+ cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ } else if (Arm64UseRelativeCall(cu, target_method)) {
+ // Defer to linker patch.
+ } else {
+ CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+ cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+ }
+ }
+ break;
+ case 2: // Grab target method*
+ CHECK_EQ(cu->dex_file, target_method.dex_file);
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+ target_method.dex_method_index).Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ break;
+ case 3: // Grab the code from the method*
+ if (direct_code == 0) {
+ // kInvokeTgt := arg0_ref->entrypoint
+ cg->LoadWordDisp(arg0_ref,
+ mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+ cg->TargetPtrReg(kInvokeTgt));
+ }
+ break;
+ default:
+ return -1;
+ }
+ }
+ return state + 1;
+}
+
+NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() {
+ return Arm64NextSDCallInsn;
+}
+
+LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
+ // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'.
+ // If the target turns out to be too far, the linker will generate a thunk for dispatch.
+ int target_method_idx = target_method.dex_method_index;
+ const DexFile* target_dex_file = target_method.dex_file;
+
+ // Generate the call instruction and save index, dex_file, and type.
+ // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
+ // as a placeholder for the offset.
+ LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
+ target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+ AppendLIR(call);
+ call_method_insns_.push_back(call);
+ return call;
+}
+
+LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
+ LIR* call_insn;
+ if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) &&
+ (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
+ method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
+ call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
+ } else {
+ call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
+ }
+ return call_insn;
+}
+
} // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 55cc938..93d9b34 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -259,6 +259,28 @@
size_t GetInstructionOffset(LIR* lir) OVERRIDE;
+ NextCallInsn GetNextSDCallInsn() OVERRIDE;
+
+ /*
+ * @brief Generate a relative call to the method that will be patched at link time.
+ * @param target_method The MethodReference of the method to be invoked.
+ * @param type How the method will be invoked.
+ * @returns Call instruction
+ */
+ LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+
+ /*
+ * @brief Generate the actual call insn based on the method info.
+ * @param method_info the lowering info for the method call.
+ * @returns Call instruction
+ */
+ virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
+
+ /*
+ * @brief Handle ARM specific literals.
+ */
+ void InstallLiteralPools() OVERRIDE;
+
LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
private:
@@ -396,6 +418,8 @@
InToRegStorageMapping in_to_reg_storage_mapping_;
static const A64EncodingMap EncodingMap[kA64Last];
+
+ ArenaVector<LIR*> call_method_insns_;
};
} // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 0462530..ba47883 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -585,7 +585,8 @@
}
Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
- : Mir2Lir(cu, mir_graph, arena) {
+ : Mir2Lir(cu, mir_graph, arena),
+ call_method_insns_(arena->Adapter()) {
// Sanity check - make sure encoding map lines up.
for (int i = 0; i < kA64Last; i++) {
if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
@@ -1201,4 +1202,21 @@
return call_state;
}
+void Arm64Mir2Lir::InstallLiteralPools() {
+ // PC-relative calls to methods.
+ patches_.reserve(call_method_insns_.size());
+ for (LIR* p : call_method_insns_) {
+ DCHECK_EQ(p->opcode, kA64Bl1t);
+ uint32_t target_method_idx = p->operands[1];
+ const DexFile* target_dex_file =
+ reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+
+ patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+ target_dex_file, target_method_idx));
+ }
+
+ // And do the normal processing.
+ Mir2Lir::InstallLiteralPools();
+}
+
} // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index c308932..174e4e0 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -495,7 +495,8 @@
uintptr_t direct_code, uintptr_t direct_method,
InvokeType type) {
DCHECK(cu->instruction_set != kX86 && cu->instruction_set != kX86_64 &&
- cu->instruction_set != kThumb2 && cu->instruction_set != kArm);
+ cu->instruction_set != kThumb2 && cu->instruction_set != kArm &&
+ cu->instruction_set != kArm64);
Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
if (direct_code != 0 && direct_method != 0) {
switch (state) {
@@ -1751,7 +1752,8 @@
LIR* Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64 &&
- cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm);
+ cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm &&
+ cu_->instruction_set != kArm64);
return OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
}
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index dd64368..e64d2ab 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -38,6 +38,7 @@
#include "scoped_thread_state_change.h"
#include "handle_scope-inl.h"
#include "utils/arm/assembler_thumb2.h"
+#include "utils/arm64/assembler_arm64.h"
#include "verifier/method_verifier.h"
namespace art {
@@ -117,10 +118,14 @@
DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
};
-class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher {
+class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher {
public:
- explicit Thumb2RelativeCallPatcher(OatWriter* writer)
- : writer_(writer), thunk_code_(CompileThunkCode()),
+ ArmBaseRelativeCallPatcher(OatWriter* writer,
+ InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
+ uint32_t max_positive_displacement, uint32_t max_negative_displacement)
+ : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code),
+ max_positive_displacement_(max_positive_displacement),
+ max_negative_displacement_(max_negative_displacement),
thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
}
@@ -130,11 +135,11 @@
// of code. To avoid any alignment discrepancies for the final chunk, we always align the
// offset after reserving of writing any chunk.
if (UNLIKELY(compiled_method == nullptr)) {
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
if (needs_thunk) {
thunk_locations_.push_back(aligned_offset);
- offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2);
+ offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
}
return offset;
}
@@ -143,14 +148,14 @@
uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
if (!unprocessed_patches_.empty() &&
- next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) {
+ next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
if (needs_thunk) {
// A single thunk will cover all pending patches.
unprocessed_patches_.clear();
uint32_t thunk_location = compiled_method->AlignCode(offset);
thunk_locations_.push_back(thunk_location);
- offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2);
+ offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
}
}
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
@@ -166,7 +171,7 @@
if (current_thunk_to_write_ == thunk_locations_.size()) {
return offset;
}
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
++current_thunk_to_write_;
uint32_t aligned_code_delta = aligned_offset - offset;
@@ -179,7 +184,7 @@
writer_->size_relative_call_thunks_ += thunk_code_.size();
uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
// Align after writing chunk, see the ReserveSpace() above.
- offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2);
+ offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
aligned_code_delta = offset - thunk_end_offset;
if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
return 0u;
@@ -188,30 +193,88 @@
return offset;
}
- void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) OVERRIDE {
- DCHECK_LE(literal_offset + 4u, code->size());
- DCHECK_EQ(literal_offset & 1u, 0u);
- DCHECK_EQ(patch_offset & 1u, 0u);
- DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
+ protected:
+ uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) {
// Unsigned arithmetic with its well-defined overflow behavior is just fine here.
- uint32_t displacement = target_offset - 1u - patch_offset;
+ uint32_t displacement = target_offset - patch_offset;
// NOTE: With unsigned arithmetic we do mean to use && rather than || below.
- if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) {
+ if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
// Unwritten thunks have higher offsets, check if it's within range.
DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
thunk_locations_[current_thunk_to_write_] > patch_offset);
if (current_thunk_to_write_ != thunk_locations_.size() &&
- thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) {
+ thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
} else {
// We must have a previous thunk then.
DCHECK_NE(current_thunk_to_write_, 0u);
DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
- DCHECK(displacement >= -kMaxNegativeDisplacement);
+ DCHECK(displacement >= -max_negative_displacement_);
}
}
+ return displacement;
+ }
+
+ private:
+ bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
+ // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+ while (!unprocessed_patches_.empty()) {
+ uint32_t patch_offset = unprocessed_patches_.front().second;
+ auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
+ if (it == writer_->method_offset_map_.end()) {
+ // If still unresolved, check if we have a thunk within range.
+ DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+ if (thunk_locations_.empty() ||
+ patch_offset - thunk_locations_.back() > max_negative_displacement_) {
+ return next_aligned_offset - patch_offset > max_positive_displacement_;
+ }
+ } else if (it->second >= patch_offset) {
+ DCHECK_LE(it->second - patch_offset, max_positive_displacement_);
+ } else {
+ // When calling back, check if we have a thunk that's closer than the actual target.
+ uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
+ ? it->second
+ : thunk_locations_.back();
+ DCHECK_GT(patch_offset, target_offset);
+ if (patch_offset - target_offset > max_negative_displacement_) {
+ return true;
+ }
+ }
+ unprocessed_patches_.pop_front();
+ }
+ return false;
+ }
+
+ OatWriter* const writer_;
+ const InstructionSet instruction_set_;
+ const std::vector<uint8_t> thunk_code_;
+ const uint32_t max_positive_displacement_;
+ const uint32_t max_negative_displacement_;
+ std::vector<uint32_t> thunk_locations_;
+ size_t current_thunk_to_write_;
+
+ // ReserveSpace() tracks unprocessed patches.
+ typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+ std::deque<UnprocessedPatch> unprocessed_patches_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher);
+};
+
+class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+ explicit Thumb2RelativeCallPatcher(OatWriter* writer)
+ : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+ }
+
+ void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+ uint32_t target_offset) OVERRIDE {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 1u, 0u);
+ DCHECK_EQ(patch_offset & 1u, 0u);
+ DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
DCHECK_EQ(displacement & 1u, 0u);
DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed.
@@ -237,35 +300,6 @@
}
private:
- bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
- // Process as many patches as possible, stop only on unresolved targets or calls too far back.
- while (!unprocessed_patches_.empty()) {
- uint32_t patch_offset = unprocessed_patches_.front().second;
- auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
- if (it == writer_->method_offset_map_.end()) {
- // If still unresolved, check if we have a thunk within range.
- DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
- if (thunk_locations_.empty() ||
- patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) {
- return next_aligned_offset - patch_offset > kMaxPositiveDisplacement;
- }
- } else if (it->second >= patch_offset) {
- DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement);
- } else {
- // When calling back, check if we have a thunk that's closer than the actual target.
- uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
- ? it->second
- : thunk_locations_.back();
- DCHECK_GT(patch_offset, target_offset);
- if (patch_offset - target_offset > kMaxNegativeDisplacement) {
- return true;
- }
- }
- unprocessed_patches_.pop_front();
- }
- return false;
- }
-
static std::vector<uint8_t> CompileThunkCode() {
// The thunk just uses the entry point in the ArtMethod. This works even for calls
// to the generic JNI and interpreter trampolines.
@@ -289,18 +323,60 @@
static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
- OatWriter* const writer_;
- const std::vector<uint8_t> thunk_code_;
- std::vector<uint32_t> thunk_locations_;
- size_t current_thunk_to_write_;
-
- // ReserveSpace() tracks unprocessed patches.
- typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
- std::deque<UnprocessedPatch> unprocessed_patches_;
-
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
};
+class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+ explicit Arm64RelativeCallPatcher(OatWriter* writer)
+ : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+ }
+
+ void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+ uint32_t target_offset) OVERRIDE {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 3u, 0u);
+ DCHECK_EQ(patch_offset & 3u, 0u);
+ DCHECK_EQ(target_offset & 3u, 0u);
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ DCHECK_EQ(displacement & 3u, 0u);
+ DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed.
+ uint32_t value = (displacement & 0x0fffffffu) >> 2;
+ value |= 0x94000000; // BL
+
+ uint8_t* addr = &(*code)[literal_offset];
+ // Check that we're just overwriting an existing BL.
+ DCHECK_EQ(addr[3] & 0xfc, 0x94);
+ // Write the new BL.
+ addr[0] = (value >> 0) & 0xff;
+ addr[1] = (value >> 8) & 0xff;
+ addr[2] = (value >> 16) & 0xff;
+ addr[3] = (value >> 24) & 0xff;
+ }
+
+ private:
+ static std::vector<uint8_t> CompileThunkCode() {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ arm64::Arm64Assembler assembler;
+ Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ std::vector<uint8_t> thunk_code(assembler.CodeSize());
+ MemoryRegion code(thunk_code.data(), thunk_code.size());
+ assembler.FinalizeInstructions(code);
+ return thunk_code;
+ }
+
+ // Maximum positive and negative displacement measured from the patch location.
+ // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
+ // the ARM64 PC pointing to the BL.)
+ static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
+ static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
+
+ DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher);
+};
+
#define DCHECK_OFFSET() \
DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
<< "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -373,7 +449,8 @@
relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
break;
case kArm64:
- // TODO: Implement relative calls for arm64.
+ relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this));
+ break;
default:
relative_call_patcher_.reset(new NoRelativeCallPatcher);
break;
@@ -868,8 +945,8 @@
: OatDexMethodVisitor(writer, relative_offset),
out_(out),
file_offset_(file_offset),
- self_(Thread::Current()),
- old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")),
+ soa_(Thread::Current()),
+ no_thread_suspension_(soa_.Self(), "OatWriter patching"),
class_linker_(Runtime::Current()->GetClassLinker()),
dex_cache_(nullptr) {
if (writer_->image_writer_ != nullptr) {
@@ -877,12 +954,9 @@
CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
patched_code_.reserve(16 * KB);
}
- self_->TransitionFromSuspendedToRunnable();
}
~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
- self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_);
- self_->TransitionFromRunnableToSuspended(kNative);
}
bool StartClass(const DexFile* dex_file, size_t class_def_index)
@@ -997,9 +1071,9 @@
private:
OutputStream* const out_;
- size_t const file_offset_;
- Thread* const self_;
- const char* const old_no_thread_suspension_cause_; // TODO: Use ScopedAssertNoThreadSuspension.
+ const size_t file_offset_;
+ const ScopedObjectAccess soa_;
+ const ScopedAssertNoThreadSuspension no_thread_suspension_;
ClassLinker* const class_linker_;
mirror::DexCache* dex_cache_;
std::vector<uint8_t> patched_code_;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5545ba8..a1e61b9 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -330,7 +330,9 @@
class RelativeCallPatcher;
class NoRelativeCallPatcher;
class X86RelativeCallPatcher;
+ class ArmBaseRelativeCallPatcher;
class Thumb2RelativeCallPatcher;
+ class Arm64RelativeCallPatcher;
std::unique_ptr<RelativeCallPatcher> relative_call_patcher_;