Enable core callee-save on x64.
Will work on other architectures and FP support in other CLs.
Change-Id: I8cef0343eedc7202d206f5217fdf0349035f0e4d
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index da83b76..285003d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -46,6 +46,7 @@
static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
static constexpr int kC2ConditionMask = 0x400;
@@ -416,17 +417,27 @@
return kX86_64WordSize;
}
+static uint32_t ComputeCoreCalleeSaveMask() {
+ uint32_t mask = 0;
+ for (size_t i = 0, e = arraysize(kCoreCalleeSaves); i < e; ++i) {
+ mask |= (1 << kCoreCalleeSaves[i]);
+ }
+ return mask;
+}
+
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
- : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0, compiler_options),
+ : CodeGenerator(graph,
+ kNumberOfCpuRegisters,
+ kNumberOfFloatRegisters,
+ 0,
+ ComputeCoreCalleeSaveMask(),
+ 0,
+ compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this) {}
-size_t CodeGeneratorX86_64::FrameEntrySpillSize() const {
- return kNumberOfPushedRegistersAtEntry * kX86_64WordSize;
-}
-
InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
CodeGeneratorX86_64* codegen)
: HGraphVisitor(graph),
@@ -459,21 +470,26 @@
return Location();
}
-void CodeGeneratorX86_64::SetupBlockedRegisters() const {
+size_t CodeGeneratorX86_64::FrameEntrySpillSize() const {
+ uint32_t mask = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+ return kNumberOfPushedRegistersAtEntry * kX86_64WordSize
+ + __builtin_popcount(mask) * kX86_64WordSize;
+}
+
+void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
// Stack register is always reserved.
blocked_core_registers_[RSP] = true;
// Block the register used as TMP.
blocked_core_registers_[TMP] = true;
- // TODO: We currently don't use Quick's callee saved registers.
- blocked_core_registers_[RBX] = true;
- blocked_core_registers_[RBP] = true;
- blocked_core_registers_[R12] = true;
- blocked_core_registers_[R13] = true;
- blocked_core_registers_[R14] = true;
- blocked_core_registers_[R15] = true;
+ if (is_baseline) {
+ for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+ blocked_core_registers_[kCoreCalleeSaves[i]] = true;
+ }
+ }
+ // TODO: We currently don't use Quick's FP callee saved registers.
blocked_fpu_registers_[XMM12] = true;
blocked_fpu_registers_[XMM13] = true;
blocked_fpu_registers_[XMM14] = true;
@@ -484,6 +500,7 @@
// Create a fake register to mimic Quick.
static const int kFakeReturnRegister = 16;
core_spill_mask_ |= (1 << kFakeReturnRegister);
+ core_spill_mask_ |= (allocated_registers_.GetCoreRegisters() & core_callee_save_mask_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -494,10 +511,14 @@
CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
RecordPcInfo(nullptr, 0);
}
+
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
+ __ pushq(CpuRegister(kCoreCalleeSaves[i]));
+ }
+ }
- // The return PC has already been pushed on the stack.
- __ subq(CpuRegister(RSP),
- Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+ __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
if (!skip_overflow_check && !implicitStackOverflowChecks) {
SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
@@ -512,8 +533,13 @@
}
void CodeGeneratorX86_64::GenerateFrameExit() {
- __ addq(CpuRegister(RSP),
- Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+ __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
+
+ for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+ if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
+ __ popq(CpuRegister(kCoreCalleeSaves[i]));
+ }
+ }
}
void CodeGeneratorX86_64::Bind(HBasicBlock* block) {