Support callee save floating point registers on x64.
- Share the computation of core_spill_mask and fpu_spill_mask
between backends.
- Remove explicit stack overflow check support: we need to adjust
them and since they are not tested, they will easily bitrot.
Change-Id: I0b619b8de4e1bdb169ea1ae7c6ede8df0d65837a
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3d99695..e60f8a5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -37,7 +37,6 @@
// Some x86_64 instructions require a register to be available as temp.
static constexpr Register TMP = R11;
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
@@ -46,7 +45,10 @@
static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
-static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
+static constexpr Register kFakeReturnRegister = Register(16);
+static constexpr Register kCoreCalleeSaves[] =
+ { RBX, RBP, R12, R13, R14, R15, kFakeReturnRegister };
+static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
static constexpr int kC2ConditionMask = 0x400;
@@ -128,22 +130,6 @@
DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
};
-class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
- public:
- StackOverflowCheckSlowPathX86_64() {}
-
- virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- __ Bind(GetEntryLabel());
- __ addq(CpuRegister(RSP),
- Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
- __ gs()->jmp(
- Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true));
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64);
-};
-
class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
public:
explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
@@ -417,26 +403,32 @@
return kX86_64WordSize;
}
-static uint32_t ComputeCoreCalleeSaveMask() {
+static uint32_t ComputeCalleeSaveMask(const int* registers, size_t length) {
uint32_t mask = 0;
- for (size_t i = 0, e = arraysize(kCoreCalleeSaves); i < e; ++i) {
- mask |= (1 << kCoreCalleeSaves[i]);
+ for (size_t i = 0, e = length; i < e; ++i) {
+ mask |= (1 << registers[i]);
}
return mask;
}
+static constexpr int kNumberOfCpuRegisterPairs = 0;
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
- 0,
- ComputeCoreCalleeSaveMask(),
- 0,
+ kNumberOfCpuRegisterPairs,
+ ComputeCalleeSaveMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+ arraysize(kCoreCalleeSaves)),
+ ComputeCalleeSaveMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+ arraysize(kFpuCalleeSaves)),
compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {}
+ move_resolver_(graph->GetArena(), this) {
+ // Use a fake return address register to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
+}
InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
CodeGeneratorX86_64* codegen)
@@ -470,12 +462,6 @@
return Location();
}
-size_t CodeGeneratorX86_64::FrameEntrySpillSize() const {
- uint32_t mask = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
- return kNumberOfPushedRegistersAtEntry * kX86_64WordSize
- + __builtin_popcount(mask) * kX86_64WordSize;
-}
-
void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
// Stack register is always reserved.
blocked_core_registers_[RSP] = true;
@@ -487,57 +473,60 @@
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
blocked_core_registers_[kCoreCalleeSaves[i]] = true;
}
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+ }
}
-
- // TODO: We currently don't use Quick's FP callee saved registers.
- blocked_fpu_registers_[XMM12] = true;
- blocked_fpu_registers_[XMM13] = true;
- blocked_fpu_registers_[XMM14] = true;
- blocked_fpu_registers_[XMM15] = true;
}
void CodeGeneratorX86_64::GenerateFrameEntry() {
- // Create a fake register to mimic Quick.
- static const int kFakeReturnRegister = 16;
- core_spill_mask_ |= (1 << kFakeReturnRegister);
- core_spill_mask_ |= (allocated_registers_.GetCoreRegisters() & core_callee_save_mask_);
-
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
- bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks();
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- if (!skip_overflow_check && implicitStackOverflowChecks) {
+ if (!skip_overflow_check) {
__ testq(CpuRegister(RAX), Address(
CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
RecordPcInfo(nullptr, 0);
}
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
- if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
- __ pushq(CpuRegister(kCoreCalleeSaves[i]));
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+ __ pushq(CpuRegister(reg));
}
}
- __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ uint32_t xmm_spill_location = GetFpuSpillStart();
+ size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
- if (!skip_overflow_check && !implicitStackOverflowChecks) {
- SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
- AddSlowPath(slow_path);
-
- __ gs()->cmpq(CpuRegister(RSP),
- Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
- __ j(kLess, slow_path->GetEntryLabel());
+ for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+ if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+ __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
+ XmmRegister(kFpuCalleeSaves[i]));
+ }
}
__ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
}
void CodeGeneratorX86_64::GenerateFrameExit() {
- __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ uint32_t xmm_spill_location = GetFpuSpillStart();
+ size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+ __ movsd(XmmRegister(kFpuCalleeSaves[i]),
+ Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+ }
+ }
+
+ __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
- __ popq(CpuRegister(kCoreCalleeSaves[i]));
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+ __ popq(CpuRegister(reg));
}
}
}