Merge "Support callee save floating point registers on x64."
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0af70f9..43fd8bb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -140,6 +140,9 @@
size_t maximum_number_of_live_core_registers,
size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots) {
+ core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+ DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+ fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
SetFrameSize(RoundUp(
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 16080a4..85d18c0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -117,13 +117,13 @@
size_t maximum_number_of_live_core_registers,
size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots);
- virtual size_t FrameEntrySpillSize() const = 0;
int32_t GetStackSlot(HLocal* local) const;
Location GetTemporaryLocation(HTemporary* temp) const;
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
+ uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
@@ -225,6 +225,7 @@
const CompilerOptions& compiler_options)
: frame_size_(kUninitializedFrameSize),
core_spill_mask_(0),
+ fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
@@ -254,9 +255,29 @@
virtual ParallelMoveResolver* GetMoveResolver() = 0;
+ // Returns the location of the first spilled entry for floating point registers,
+ // relative to the stack pointer.
+ uint32_t GetFpuSpillStart() const {
+ DCHECK_NE(frame_size_, kUninitializedFrameSize);
+ return GetFrameSize() - FrameEntrySpillSize();
+ }
+
+ uint32_t GetFpuSpillSize() const {
+ return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
+ }
+
+ uint32_t GetCoreSpillSize() const {
+ return POPCOUNT(core_spill_mask_) * GetWordSize();
+ }
+
+ uint32_t FrameEntrySpillSize() const {
+ return GetFpuSpillSize() + GetCoreSpillSize();
+ }
+
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
+ uint32_t fpu_spill_mask_;
uint32_t first_register_slot_in_slow_path_;
// Registers that were allocated during linear scan.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index bc8858b..f4e4f5a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -42,7 +42,6 @@
return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
}
-static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
@@ -113,20 +112,6 @@
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
};
-class StackOverflowCheckSlowPathARM : public SlowPathCodeARM {
- public:
- StackOverflowCheckSlowPathARM() {}
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- __ Bind(GetEntryLabel());
- __ LoadFromOffset(kLoadWord, PC, TR,
- QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM);
-};
-
class SuspendCheckSlowPathARM : public SlowPathCodeARM {
public:
SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
@@ -390,16 +375,19 @@
const ArmInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options)
: CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters,
- kNumberOfRegisterPairs, 0, 0, compiler_options),
+ kNumberOfRegisterPairs, (1 << R6) | (1 << R7) | (1 << LR), 0, compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
assembler_(true),
- isa_features_(isa_features) {}
-
-size_t CodeGeneratorARM::FrameEntrySpillSize() const {
- return kNumberOfPushedRegistersAtEntry * kArmWordSize;
+ isa_features_(isa_features) {
+ // We unconditionally allocate R6 and R7 to ensure we can do long operations
+ // with baseline.
+ AddAllocatedRegister(Location::RegisterLocation(R6));
+ AddAllocatedRegister(Location::RegisterLocation(R7));
+ // Save the link register to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(LR));
}
Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
@@ -516,32 +504,21 @@
void CodeGeneratorARM::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
if (!skip_overflow_check) {
- if (GetCompilerOptions().GetImplicitStackOverflowChecks()) {
- __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
- __ LoadFromOffset(kLoadWord, IP, IP, 0);
- RecordPcInfo(nullptr, 0);
- } else {
- SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
- AddSlowPath(slow_path);
-
- __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value());
- __ cmp(SP, ShifterOperand(IP));
- __ b(slow_path->GetEntryLabel(), CC);
- }
+ __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+ __ LoadFromOffset(kLoadWord, IP, IP, 0);
+ RecordPcInfo(nullptr, 0);
}
- core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
- __ PushList(1 << LR | 1 << R6 | 1 << R7);
-
- // The return PC has already been pushed on the stack.
- __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize));
+ __ PushList(core_spill_mask_);
+ __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
__ StoreToOffset(kStoreWord, R0, SP, 0);
}
void CodeGeneratorARM::GenerateFrameExit() {
- __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize);
- __ PopList(1 << PC | 1 << R6 | 1 << R7);
+ __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+ __ PopList((core_spill_mask_ & (~(1 << LR))) | 1 << PC);
}
void CodeGeneratorARM::Bind(HBasicBlock* block) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index f3b1ff5..46accfd 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -179,8 +179,6 @@
return kArmWordSize;
}
- size_t FrameEntrySpillSize() const OVERRIDE;
-
HGraphVisitor* GetLocationBuilder() OVERRIDE {
return &location_builder_;
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 21c1e9c..1f561b7 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -434,21 +434,6 @@
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
};
-class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 {
- public:
- StackOverflowCheckSlowPathARM64() {}
-
- virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- __ Bind(GetEntryLabel());
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0);
- CheckEntrypointTypes<kQuickThrowStackOverflow, void, void*>();
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64);
-};
-
class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction,
@@ -567,13 +552,16 @@
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
kNumberOfAllocatableRegisterPairs,
- 0,
+ (1 << LR),
0,
compiler_options),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {}
+ move_resolver_(graph->GetArena(), this) {
+ // Save the link register (containing the return address) to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(LR));
+}
#undef __
#define __ GetVIXLAssembler()->
@@ -607,26 +595,15 @@
if (do_overflow_check) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireX();
- if (GetCompilerOptions().GetImplicitStackOverflowChecks()) {
- __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
- __ Ldr(wzr, MemOperand(temp, 0));
- RecordPcInfo(nullptr, 0);
- } else {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64();
- AddSlowPath(slow_path);
-
- __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value()));
- __ Cmp(sp, temp);
- __ B(lo, slow_path->GetEntryLabel());
- }
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+ __ Ldr(wzr, MemOperand(temp, 0));
+ RecordPcInfo(nullptr, 0);
}
- CPURegList preserved_regs = GetFramePreservedRegisters();
int frame_size = GetFrameSize();
- core_spill_mask_ |= preserved_regs.list();
-
__ Str(w0, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes());
+ __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
// Stack layout:
// sp[frame_size - 8] : lr.
@@ -638,8 +615,7 @@
void CodeGeneratorARM64::GenerateFrameExit() {
int frame_size = GetFrameSize();
- CPURegList preserved_regs = GetFramePreservedRegisters();
- __ PeekCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes());
+ __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
__ Drop(frame_size);
}
@@ -690,10 +666,6 @@
}
}
-size_t CodeGeneratorARM64::FrameEntrySpillSize() const {
- return GetFramePreservedRegistersSize();
-}
-
Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
Primitive::Type type = load->GetType();
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d81e481..96013e5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -178,9 +178,6 @@
vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
return frame_preserved_regs;
}
- static int GetFramePreservedRegistersSize() {
- return GetFramePreservedRegisters().TotalSizeInBytes();
- }
void Bind(HBasicBlock* block) OVERRIDE;
@@ -205,8 +202,6 @@
return block_entry_label->location();
}
- size_t FrameEntrySpillSize() const OVERRIDE;
-
HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9e26ddd..c0fdcaa 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -31,7 +31,6 @@
namespace x86 {
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
@@ -44,6 +43,7 @@
// Marker for places that can be updated once we don't follow the quick ABI.
static constexpr bool kFollowsQuickABI = true;
+static constexpr int kFakeReturnRegister = Register(8);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
public:
@@ -123,21 +123,6 @@
DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
};
-class StackOverflowCheckSlowPathX86 : public SlowPathCodeX86 {
- public:
- StackOverflowCheckSlowPathX86() {}
-
- virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- __ Bind(GetEntryLabel());
- __ addl(ESP,
- Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
- __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow)));
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86);
-};
-
class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
public:
BoundsCheckSlowPathX86(HBoundsCheck* instruction,
@@ -375,14 +360,13 @@
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
: CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters,
- kNumberOfRegisterPairs, 0, 0, compiler_options),
+ kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {}
-
-size_t CodeGeneratorX86::FrameEntrySpillSize() const {
- return kNumberOfPushedRegistersAtEntry * kX86WordSize;
+ move_resolver_(graph->GetArena(), this) {
+ // Use a fake return address register to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
@@ -464,35 +448,21 @@
codegen_(codegen) {}
void CodeGeneratorX86::GenerateFrameEntry() {
- // Create a fake register to mimic Quick.
- static const int kFakeReturnRegister = 8;
- core_spill_mask_ |= (1 << kFakeReturnRegister);
-
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
- bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks();
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- if (!skip_overflow_check && implicitStackOverflowChecks) {
+ if (!skip_overflow_check) {
__ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
RecordPcInfo(nullptr, 0);
}
- // The return PC has already been pushed on the stack.
- __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
-
- if (!skip_overflow_check && !implicitStackOverflowChecks) {
- SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86();
- AddSlowPath(slow_path);
-
- __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>()));
- __ j(kLess, slow_path->GetEntryLabel());
- }
-
+ __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
__ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
void CodeGeneratorX86::GenerateFrameExit() {
- __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
+ __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
}
void CodeGeneratorX86::Bind(HBasicBlock* block) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index dcfeb2f..73b647c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,8 +178,6 @@
return 2 * kX86WordSize;
}
- size_t FrameEntrySpillSize() const OVERRIDE;
-
HGraphVisitor* GetLocationBuilder() OVERRIDE {
return &location_builder_;
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3d99695..e60f8a5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -37,7 +37,6 @@
// Some x86_64 instructions require a register to be available as temp.
static constexpr Register TMP = R11;
-static constexpr int kNumberOfPushedRegistersAtEntry = 1;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
@@ -46,7 +45,10 @@
static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
-static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
+static constexpr Register kFakeReturnRegister = Register(16);
+static constexpr Register kCoreCalleeSaves[] =
+ { RBX, RBP, R12, R13, R14, R15, kFakeReturnRegister };
+static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
static constexpr int kC2ConditionMask = 0x400;
@@ -128,22 +130,6 @@
DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
};
-class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
- public:
- StackOverflowCheckSlowPathX86_64() {}
-
- virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- __ Bind(GetEntryLabel());
- __ addq(CpuRegister(RSP),
- Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
- __ gs()->jmp(
- Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true));
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64);
-};
-
class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
public:
explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
@@ -417,26 +403,32 @@
return kX86_64WordSize;
}
-static uint32_t ComputeCoreCalleeSaveMask() {
+static uint32_t ComputeCalleeSaveMask(const int* registers, size_t length) {
uint32_t mask = 0;
- for (size_t i = 0, e = arraysize(kCoreCalleeSaves); i < e; ++i) {
- mask |= (1 << kCoreCalleeSaves[i]);
+ for (size_t i = 0, e = length; i < e; ++i) {
+ mask |= (1 << registers[i]);
}
return mask;
}
+static constexpr int kNumberOfCpuRegisterPairs = 0;
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
- 0,
- ComputeCoreCalleeSaveMask(),
- 0,
+ kNumberOfCpuRegisterPairs,
+ ComputeCalleeSaveMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+ arraysize(kCoreCalleeSaves)),
+ ComputeCalleeSaveMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+ arraysize(kFpuCalleeSaves)),
compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {}
+ move_resolver_(graph->GetArena(), this) {
+ // Use a fake return address register to mimic Quick.
+ AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
+}
InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
CodeGeneratorX86_64* codegen)
@@ -470,12 +462,6 @@
return Location();
}
-size_t CodeGeneratorX86_64::FrameEntrySpillSize() const {
- uint32_t mask = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
- return kNumberOfPushedRegistersAtEntry * kX86_64WordSize
- + __builtin_popcount(mask) * kX86_64WordSize;
-}
-
void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
// Stack register is always reserved.
blocked_core_registers_[RSP] = true;
@@ -487,57 +473,60 @@
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
blocked_core_registers_[kCoreCalleeSaves[i]] = true;
}
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+ }
}
-
- // TODO: We currently don't use Quick's FP callee saved registers.
- blocked_fpu_registers_[XMM12] = true;
- blocked_fpu_registers_[XMM13] = true;
- blocked_fpu_registers_[XMM14] = true;
- blocked_fpu_registers_[XMM15] = true;
}
void CodeGeneratorX86_64::GenerateFrameEntry() {
- // Create a fake register to mimic Quick.
- static const int kFakeReturnRegister = 16;
- core_spill_mask_ |= (1 << kFakeReturnRegister);
- core_spill_mask_ |= (allocated_registers_.GetCoreRegisters() & core_callee_save_mask_);
-
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
- bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks();
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- if (!skip_overflow_check && implicitStackOverflowChecks) {
+ if (!skip_overflow_check) {
__ testq(CpuRegister(RAX), Address(
CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
RecordPcInfo(nullptr, 0);
}
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
- if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
- __ pushq(CpuRegister(kCoreCalleeSaves[i]));
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+ __ pushq(CpuRegister(reg));
}
}
- __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ uint32_t xmm_spill_location = GetFpuSpillStart();
+ size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
- if (!skip_overflow_check && !implicitStackOverflowChecks) {
- SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
- AddSlowPath(slow_path);
-
- __ gs()->cmpq(CpuRegister(RSP),
- Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
- __ j(kLess, slow_path->GetEntryLabel());
+ for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+ if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+ __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
+ XmmRegister(kFpuCalleeSaves[i]));
+ }
}
__ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
}
void CodeGeneratorX86_64::GenerateFrameExit() {
- __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ uint32_t xmm_spill_location = GetFpuSpillStart();
+ size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+ __ movsd(XmmRegister(kFpuCalleeSaves[i]),
+ Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+ }
+ }
+
+ __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) {
- __ popq(CpuRegister(kCoreCalleeSaves[i]));
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) {
+ __ popq(CpuRegister(reg));
}
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 645fb17..1ac2ab7 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -194,8 +194,6 @@
return kX86_64WordSize;
}
- size_t FrameEntrySpillSize() const OVERRIDE;
-
HGraphVisitor* GetLocationBuilder() OVERRIDE {
return &location_builder_;
}
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 1e0d65a..5bca730 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -286,7 +286,7 @@
ArrayRef<const uint8_t>(allocator.GetMemory()),
codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
- 0, /* FPR spill mask, unused */
+ codegen->GetFpuSpillMask(),
ArrayRef<const uint8_t>(stack_map));
}
@@ -313,7 +313,7 @@
ArrayRef<const uint8_t>(allocator.GetMemory()),
codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
- 0, /* FPR spill mask, unused */
+ codegen->GetFpuSpillMask(),
&src_mapping_table,
AlignVectorSize(mapping_table),
AlignVectorSize(vmap_table),