Merge "ART: Fix ARM64 long-jump context caller-save smashes"
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index fae44af..09e8b59 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -99,14 +99,33 @@
gprs_[X14] = NULL;
gprs_[X15] = NULL;
- fprs_[D8] = NULL;
- fprs_[D9] = NULL;
- fprs_[D10] = NULL;
- fprs_[D11] = NULL;
- fprs_[D12] = NULL;
- fprs_[D13] = NULL;
- fprs_[D14] = NULL;
- fprs_[D15] = NULL;
+ // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
+
+ fprs_[D0] = NULL;
+ fprs_[D1] = NULL;
+ fprs_[D2] = NULL;
+ fprs_[D3] = NULL;
+ fprs_[D4] = NULL;
+ fprs_[D5] = NULL;
+ fprs_[D6] = NULL;
+ fprs_[D7] = NULL;
+
+ fprs_[D16] = NULL;
+ fprs_[D17] = NULL;
+ fprs_[D18] = NULL;
+ fprs_[D19] = NULL;
+ fprs_[D20] = NULL;
+ fprs_[D21] = NULL;
+ fprs_[D22] = NULL;
+ fprs_[D23] = NULL;
+ fprs_[D24] = NULL;
+ fprs_[D25] = NULL;
+ fprs_[D26] = NULL;
+ fprs_[D27] = NULL;
+ fprs_[D28] = NULL;
+ fprs_[D29] = NULL;
+ fprs_[D30] = NULL;
+ fprs_[D31] = NULL;
}
extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index f2050b3..346b08c 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -42,7 +42,7 @@
#endif
// FP args
- stp d1, d2, [sp, #8]
+ stp d0, d1, [sp, #8]
stp d2, d3, [sp, #24]
stp d4, d5, [sp, #40]
stp d6, d7, [sp, #56]
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index d9bc105..aa18c97 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -59,116 +59,7 @@
public:
size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
- // Push a transition back into managed code onto the linked list in thread.
- ManagedStack fragment;
- self->PushManagedStackFragment(&fragment);
-
- size_t result;
-#if defined(__i386__)
- // TODO: Set the thread?
- __asm__ __volatile__(
- "pushl $0\n\t" // Push nullptr to terminate quick stack
- "call *%%edi\n\t" // Call the stub
- "addl $4, %%esp" // Pop nullptr
- : "=a" (result)
- // Use the result from eax
- : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code)
- // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
- : ); // clobber.
- // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
- // but compilation fails when declaring that.
-#elif defined(__arm__)
- __asm__ __volatile__(
- "push {r1-r12, lr}\n\t" // Save state, 13*4B = 52B
- ".cfi_adjust_cfa_offset 52\n\t"
- "push {r9}\n\t"
- ".cfi_adjust_cfa_offset 4\n\t"
- "mov r9, #0\n\n"
- "str r9, [sp, #-8]!\n\t" // Push nullptr to terminate stack, +8B padding so 16B aligned
- ".cfi_adjust_cfa_offset 8\n\t"
- "ldr r9, [sp, #8]\n\t"
-
- // Push everything on the stack, so we don't rely on the order. What a mess. :-(
- "sub sp, sp, #20\n\t"
- "str %[arg0], [sp]\n\t"
- "str %[arg1], [sp, #4]\n\t"
- "str %[arg2], [sp, #8]\n\t"
- "str %[code], [sp, #12]\n\t"
- "str %[self], [sp, #16]\n\t"
- "ldr r0, [sp]\n\t"
- "ldr r1, [sp, #4]\n\t"
- "ldr r2, [sp, #8]\n\t"
- "ldr r3, [sp, #12]\n\t"
- "ldr r9, [sp, #16]\n\t"
- "add sp, sp, #20\n\t"
-
- "blx r3\n\t" // Call the stub
- "add sp, sp, #12\n\t" // Pop nullptr and padding
- ".cfi_adjust_cfa_offset -12\n\t"
- "pop {r1-r12, lr}\n\t" // Restore state
- ".cfi_adjust_cfa_offset -52\n\t"
- "mov %[result], r0\n\t" // Save the result
- : [result] "=r" (result)
- // Use the result from r0
- : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
- : ); // clobber.
-#elif defined(__aarch64__)
- __asm__ __volatile__(
- "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned
- ".cfi_adjust_cfa_offset 48\n\t"
- "stp xzr, x1, [sp]\n\t" // nullptr(end of quick stack), x1
- "stp x2, x3, [sp, #16]\n\t" // Save x2, x3
- "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR
-
- // Push everything on the stack, so we don't rely on the order. What a mess. :-(
- "sub sp, sp, #48\n\t"
- "str %[arg0], [sp]\n\t"
- "str %[arg1], [sp, #8]\n\t"
- "str %[arg2], [sp, #16]\n\t"
- "str %[code], [sp, #24]\n\t"
- "str %[self], [sp, #32]\n\t"
- "ldr x0, [sp]\n\t"
- "ldr x1, [sp, #8]\n\t"
- "ldr x2, [sp, #16]\n\t"
- "ldr x3, [sp, #24]\n\t"
- "ldr x18, [sp, #32]\n\t"
- "add sp, sp, #48\n\t"
-
- "blr x3\n\t" // Call the stub
- "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2
- "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF
- "ldr x30, [sp, #40]\n\t" // Restore xLR
- "add sp, sp, #48\n\t" // Free stack space
- ".cfi_adjust_cfa_offset -48\n\t"
-
- "mov %[result], x0\n\t" // Save the result
- : [result] "=r" (result)
- // Use the result from r0
- : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
- : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber.
-#elif defined(__x86_64__)
- // Note: Uses the native convention
- // TODO: Set the thread?
- __asm__ __volatile__(
- "pushq $0\n\t" // Push nullptr to terminate quick stack
- "pushq $0\n\t" // 16B alignment padding
- ".cfi_adjust_cfa_offset 16\n\t"
- "call *%%rax\n\t" // Call the stub
- "addq $16, %%rsp\n\t" // Pop nullptr and padding
- ".cfi_adjust_cfa_offset -16\n\t"
- : "=a" (result)
- // Use the result from rax
- : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
- // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
- : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all
- // TODO: Should we clobber the other registers?
-#else
- LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
- result = 0;
-#endif
- // Pop transition.
- self->PopManagedStackFragment(fragment);
- return result;
+ return Invoke3WithReferrer(arg0, arg1, arg2, code, self, nullptr);
}
// TODO: Set up a frame according to referrer's specs.
@@ -179,6 +70,7 @@
self->PushManagedStackFragment(&fragment);
size_t result;
+ size_t fpr_result = 0;
#if defined(__i386__)
// TODO: Set the thread?
__asm__ __volatile__(
@@ -230,6 +122,14 @@
: ); // clobber.
#elif defined(__aarch64__)
__asm__ __volatile__(
+ // Spill space for d8 - d15
+ "sub sp, sp, #64\n\t"
+ ".cfi_adjust_cfa_offset 64\n\t"
+ "stp d8, d9, [sp]\n\t"
+ "stp d10, d11, [sp, #16]\n\t"
+ "stp d12, d13, [sp, #32]\n\t"
+ "stp d14, d15, [sp, #48]\n\t"
+
"sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned
".cfi_adjust_cfa_offset 48\n\t"
"stp %[referrer], x1, [sp]\n\t"// referrer, x1
@@ -238,27 +138,118 @@
// Push everything on the stack, so we don't rely on the order. What a mess. :-(
"sub sp, sp, #48\n\t"
+ ".cfi_adjust_cfa_offset 48\n\t"
"str %[arg0], [sp]\n\t"
"str %[arg1], [sp, #8]\n\t"
"str %[arg2], [sp, #16]\n\t"
"str %[code], [sp, #24]\n\t"
"str %[self], [sp, #32]\n\t"
+
+ // Now we definitely have x0-x3 free, use it to garble d8 - d15
+ "movk x0, #0xfad0\n\t"
+ "movk x0, #0xebad, lsl #16\n\t"
+ "movk x0, #0xfad0, lsl #32\n\t"
+ "movk x0, #0xebad, lsl #48\n\t"
+ "fmov d8, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d9, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d10, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d11, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d12, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d13, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d14, x0\n\t"
+ "add x0, x0, 1\n\t"
+ "fmov d15, x0\n\t"
+
+ // Load call params
"ldr x0, [sp]\n\t"
"ldr x1, [sp, #8]\n\t"
"ldr x2, [sp, #16]\n\t"
"ldr x3, [sp, #24]\n\t"
"ldr x18, [sp, #32]\n\t"
"add sp, sp, #48\n\t"
-
- "blr x3\n\t" // Call the stub
- "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2
- "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF
- "ldr x30, [sp, #40]\n\t" // Restore xLR
- "add sp, sp, #48\n\t" // Free stack space
".cfi_adjust_cfa_offset -48\n\t"
+
+ "blr x3\n\t" // Call the stub
+
+ // Test d8 - d15. We can use x1 and x2.
+ "movk x1, #0xfad0\n\t"
+ "movk x1, #0xebad, lsl #16\n\t"
+ "movk x1, #0xfad0, lsl #32\n\t"
+ "movk x1, #0xebad, lsl #48\n\t"
+ "fmov x2, d8\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d9\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d10\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d11\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d12\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d13\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d14\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+ "add x1, x1, 1\n\t"
+
+ "fmov x2, d15\n\t"
+ "cmp x1, x2\n\t"
+ "b.ne 1f\n\t"
+
+ "mov %[fpr_result], #0\n\t"
+
+ // Finish up.
+ "2:\n\t"
+ "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2
+ "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF
+ "ldr x30, [sp, #40]\n\t" // Restore xLR
+ "add sp, sp, #48\n\t" // Free stack space
+ ".cfi_adjust_cfa_offset -48\n\t"
"mov %[result], x0\n\t" // Save the result
- : [result] "=r" (result)
+
+ "ldp d8, d9, [sp]\n\t" // Restore d8 - d15
+ "ldp d10, d11, [sp, #16]\n\t"
+ "ldp d12, d13, [sp, #32]\n\t"
+ "ldp d14, d15, [sp, #48]\n\t"
+ "add sp, sp, #64\n\t"
+ ".cfi_adjust_cfa_offset -64\n\t"
+
+ "b 3f\n\t" // Goto end
+
+ // Failed fpr verification.
+ "1:\n\t"
+ "mov %[fpr_result], #1\n\t"
+ "b 2b\n\t" // Goto finish-up
+
+ // End
+ "3:\n\t"
+ : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
// Use the result from r0
: [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
[referrer] "r"(referrer)
@@ -285,6 +276,10 @@
#endif
// Pop transition.
self->PopManagedStackFragment(fragment);
+
+ fp_result = fpr_result;
+ EXPECT_EQ(0U, fp_result);
+
return result;
}
@@ -314,6 +309,9 @@
return 0;
#endif
}
+
+ protected:
+ size_t fp_result;
};