AArch64: Fix quick compiler monitor implementation.
Also with some small fixes :
1. Enable some dex byte code to compile.
2. Copy the register definition from runtime.cc.
3. A quick fix for "cmp Wn, Wm" in the assembler.
4. Optimise GenMoveException a bit by using xzr.
5. Fix improper use of StoreValueWide() on 32-bit value in FlushIns().
6. Fix one debug assert in the assembler.
It can pass all cases in run-all-test, except 044 which also fails with
the interpreter.
Change-Id: I9cc0253f1039c78d5100640235ac33e884b02560
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 144594e..3bc060b 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -147,8 +147,8 @@
// Instruction::MOVE_RESULT,
// Instruction::MOVE_RESULT_WIDE,
// Instruction::MOVE_RESULT_OBJECT,
- // Instruction::MOVE_EXCEPTION,
- // Instruction::RETURN_VOID,
+ Instruction::MOVE_EXCEPTION,
+ Instruction::RETURN_VOID,
// Instruction::RETURN,
// Instruction::RETURN_WIDE,
// Instruction::RETURN_OBJECT,
@@ -163,8 +163,8 @@
// Instruction::CONST_STRING,
// Instruction::CONST_STRING_JUMBO,
// Instruction::CONST_CLASS,
- // Instruction::MONITOR_ENTER,
- // Instruction::MONITOR_EXIT,
+ Instruction::MONITOR_ENTER,
+ Instruction::MONITOR_EXIT,
// Instruction::CHECK_CAST,
// Instruction::INSTANCE_OF,
// Instruction::ARRAY_LENGTH,
@@ -173,7 +173,7 @@
// Instruction::FILLED_NEW_ARRAY,
// Instruction::FILLED_NEW_ARRAY_RANGE,
// Instruction::FILL_ARRAY_DATA,
- // Instruction::THROW,
+ Instruction::THROW,
// Instruction::GOTO,
// Instruction::GOTO_16,
// Instruction::GOTO_32,
@@ -230,14 +230,14 @@
// Instruction::IPUT_BYTE,
// Instruction::IPUT_CHAR,
// Instruction::IPUT_SHORT,
- // Instruction::SGET,
+ Instruction::SGET,
// Instruction::SGET_WIDE,
- // Instruction::SGET_OBJECT,
+ Instruction::SGET_OBJECT,
// Instruction::SGET_BOOLEAN,
// Instruction::SGET_BYTE,
// Instruction::SGET_CHAR,
// Instruction::SGET_SHORT,
- // Instruction::SPUT,
+ Instruction::SPUT,
// Instruction::SPUT_WIDE,
// Instruction::SPUT_OBJECT,
// Instruction::SPUT_BOOLEAN,
@@ -350,7 +350,7 @@
// Instruction::AND_INT_LIT16,
// Instruction::OR_INT_LIT16,
// Instruction::XOR_INT_LIT16,
- // Instruction::ADD_INT_LIT8,
+ Instruction::ADD_INT_LIT8,
// Instruction::RSUB_INT_LIT8,
// Instruction::MUL_INT_LIT8,
// Instruction::DIV_INT_LIT8,
@@ -403,7 +403,7 @@
// kMirOpNullCheck,
// kMirOpRangeCheck,
// kMirOpDivZeroCheck,
- // kMirOpCheck,
+ kMirOpCheck,
// kMirOpCheckPart2,
// kMirOpSelect,
// kMirOpLast,
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 7ae4b02..f98e366 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -151,6 +151,9 @@
rxzr = rx31,
rwsp = rw31,
rsp = rx31,
+ // TODO: rx4 is an argument register in C ABI which is not a good idea,
+ // But we need to decide to use caller save register in C ABI or callee save register.
+ // Because it will result to different implementation in the trampoline.
rA64_SUSPEND = rx4,
rA64_SELF = rx18,
rA64_SP = rx31,
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 8accd0a..93caf89 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -176,7 +176,7 @@
kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
"cmn", "!0R, #!1d!2T", kFixupNone),
- ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f),
+ ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b00001f),
kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
"cmp", "!0R, !1r!2o", kFixupNone),
@@ -637,7 +637,7 @@
}
// Now check that the requirements are satisfied.
- RegStorage reg(operand);
+ RegStorage reg(operand | RegStorage::kValid);
const char *expected = nullptr;
if (want_float) {
if (!reg.IsFloat()) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 1bcf19b..136a04f 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -194,137 +194,101 @@
* details see monitor.cc.
*/
void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+ // x0/w0 = object
+ // w1 = thin lock thread id
+ // x2 = address of lock word
+ // w3 = lock word / store failure
+ // TUNING: How much performance we get when we inline this?
+ // Since we've already flush all register.
FlushAllRegs();
- // FIXME: need separate LoadValues for object references.
- LoadValueDirectFixed(rl_src, rs_x0); // Get obj
+ LoadValueDirectFixed(rl_src, rs_w0);
LockCallTemps(); // Prepare for explicit register usage
- constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
- if (kArchVariantHasGoodBranchPredictor) {
- LIR* null_check_branch = nullptr;
- if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
- null_check_branch = nullptr; // No null check.
- } else {
- // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
- if (Runtime::Current()->ExplicitNullChecks()) {
- null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
- }
- }
- Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
- NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
- MarkPossibleNullPointerException(opt_flags);
- LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
- NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
- LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
-
-
- LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
- not_unlocked_branch->target = slow_path_target;
- if (null_check_branch != nullptr) {
- null_check_branch->target = slow_path_target;
- }
- // TODO: move to a slow path.
- // Go expensive route - artLockObjectFromCode(obj);
- LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
- ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
- MarkSafepointPC(call_inst);
-
- LIR* success_target = NewLIR0(kPseudoTargetLabel);
- lock_success_branch->target = success_target;
- GenMemBarrier(kLoadLoad);
+ LIR* null_check_branch = nullptr;
+ if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+ null_check_branch = nullptr; // No null check.
} else {
- // Explicit null-check as slow-path is entered using an IT.
- GenNullCheck(rs_x0, opt_flags);
- Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
- MarkPossibleNullPointerException(opt_flags);
- NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
- OpRegImm(kOpCmp, rs_x1, 0);
- OpIT(kCondEq, "");
- NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
- OpRegImm(kOpCmp, rs_x1, 0);
- OpIT(kCondNe, "T");
- // Go expensive route - artLockObjectFromCode(self, obj);
- LoadWordDisp/*ne*/(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(),
- rs_rA64_LR);
- ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
- MarkSafepointPC(call_inst);
- GenMemBarrier(kLoadLoad);
+ // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+ if (Runtime::Current()->ExplicitNullChecks()) {
+ null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+ }
}
+ Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+ OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
+ NewLIR2(kA64Ldxr2rX, rw3, rx2);
+ MarkPossibleNullPointerException(opt_flags);
+ LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+ NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
+ LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
+
+ LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+ not_unlocked_branch->target = slow_path_target;
+ if (null_check_branch != nullptr) {
+ null_check_branch->target = slow_path_target;
+ }
+ // TODO: move to a slow path.
+ // Go expensive route - artLockObjectFromCode(obj);
+ LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
+ ClobberCallerSave();
+ LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+ MarkSafepointPC(call_inst);
+
+ LIR* success_target = NewLIR0(kPseudoTargetLabel);
+ lock_success_branch->target = success_target;
+ GenMemBarrier(kLoadLoad);
}
/*
* Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
* and can only give away ownership if its suspended.
*/
void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+ // x0/w0 = object
+ // w1 = thin lock thread id
+ // w2 = lock word
+ // TUNING: How much performance we get when we inline this?
+ // Since we've already flush all register.
FlushAllRegs();
- LoadValueDirectFixed(rl_src, rs_x0); // Get obj
+ LoadValueDirectFixed(rl_src, rs_w0); // Get obj
LockCallTemps(); // Prepare for explicit register usage
LIR* null_check_branch = nullptr;
- Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
- constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
- if (kArchVariantHasGoodBranchPredictor) {
- if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
- null_check_branch = nullptr; // No null check.
- } else {
- // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
- if (Runtime::Current()->ExplicitNullChecks()) {
- null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
- }
- }
- Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);
- MarkPossibleNullPointerException(opt_flags);
- LoadConstantNoClobber(rs_x3, 0);
- LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
- GenMemBarrier(kStoreLoad);
- Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
- LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
-
- LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
- slow_unlock_branch->target = slow_path_target;
- if (null_check_branch != nullptr) {
- null_check_branch->target = slow_path_target;
- }
- // TODO: move to a slow path.
- // Go expensive route - artUnlockObjectFromCode(obj);
- LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
- ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
- MarkSafepointPC(call_inst);
-
- LIR* success_target = NewLIR0(kPseudoTargetLabel);
- unlock_success_branch->target = success_target;
+ if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+ null_check_branch = nullptr; // No null check.
} else {
- // Explicit null-check as slow-path is entered using an IT.
- GenNullCheck(rs_x0, opt_flags);
- Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1); // Get lock
- MarkPossibleNullPointerException(opt_flags);
- Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
- LoadConstantNoClobber(rs_x3, 0);
- // Is lock unheld on lock or held by us (==thread_id) on unlock?
- OpRegReg(kOpCmp, rs_x1, rs_x2);
- OpIT(kCondEq, "EE");
- Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
- // Go expensive route - UnlockObjectFromCode(obj);
- LoadWordDisp/*ne*/(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(),
- rs_rA64_LR);
- ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
- MarkSafepointPC(call_inst);
- GenMemBarrier(kStoreLoad);
+ // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+ if (Runtime::Current()->ExplicitNullChecks()) {
+ null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+ }
}
+ Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+ Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+ MarkPossibleNullPointerException(opt_flags);
+ LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
+ GenMemBarrier(kStoreLoad);
+ Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr);
+ LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+ LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+ slow_unlock_branch->target = slow_path_target;
+ if (null_check_branch != nullptr) {
+ null_check_branch->target = slow_path_target;
+ }
+ // TODO: move to a slow path.
+ // Go expensive route - artUnlockObjectFromCode(obj);
+ LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
+ ClobberCallerSave();
+ LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+ MarkSafepointPC(call_inst);
+
+ LIR* success_target = NewLIR0(kPseudoTargetLabel);
+ unlock_success_branch->target = success_target;
}
void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- RegStorage reset_reg = AllocTemp();
Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
- LoadConstant(reset_reg, 0);
- Store32Disp(rs_rA64_SELF, ex_offset, reset_reg);
- FreeTemp(reset_reg);
+ Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
StoreValue(rl_dest, rl_result);
}
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 6caacc8..10be0d6 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -38,16 +38,27 @@
rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
static const RegStorage dp_regs_arr[] =
{rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
- rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15};
+ rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
+ rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+ rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
static const RegStorage reserved_regs_arr[] =
{rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
+// TUING: Are there too many temp registers and too less promote target?
+// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
+// Note: we are not able to call to C function directly if it un-match C ABI.
+// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
static const RegStorage core_temps_arr[] =
- {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12};
+ {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+ rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
+ rs_x17};
static const RegStorage sp_temps_arr[] =
{rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
- rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15};
+ rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+ rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
static const RegStorage dp_temps_arr[] =
- {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7};
+ {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+ rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+ rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
static const std::vector<RegStorage> core_regs(core_regs_arr,
core_regs_arr + arraysize(core_regs_arr));
@@ -877,12 +888,13 @@
rl_src.home = false;
MarkLive(rl_src);
- // TODO(Arm64): compress the Method pointer?
- StoreValueWide(rl_method, rl_src);
+ // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it.
+ StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
- // If Method* has been promoted, explicitly flush
+ // If Method* has been promoted, load it,
+ // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded.
if (rl_method.location == kLocPhysReg) {
- StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+ StoreValue(rl_method, rl_src);
}
if (cu_->num_ins == 0) {