Opt compiler: ARM64: Use ldp/stp on arm64 for slow paths.
It should be a bit faster than load/store single registers and reduce
the code size.
Change-Id: I67b8302adf6174b7bb728f7c2afd2c237e34ffde
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1c6debd..7e9cdac 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -64,6 +64,7 @@
using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
using helpers::ARM64EncodableConstantOrRegister;
+using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
static constexpr int kCurrentMethodStackOffset = 0;
@@ -105,6 +106,88 @@
#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
+// Calculate memory accessing operand for save/restore live registers.
+static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
+ RegisterSet* register_set,
+ int64_t spill_offset,
+ bool is_save) {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(register_set->GetCoreRegisters(),
+ codegen->GetNumberOfCoreRegisters(),
+ register_set->GetFloatingPointRegisters(),
+ codegen->GetNumberOfFloatingPointRegisters()));
+
+ CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
+ register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+ CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
+ register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+
+ MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ Register base = masm->StackPointer();
+ int64_t core_spill_size = core_list.TotalSizeInBytes();
+ int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+ int64_t reg_size = kXRegSizeInBytes;
+ int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
+ uint32_t ls_access_size = WhichPowerOf2(reg_size);
+ if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+ !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
+ // If the offset does not fit in the instruction's immediate field, use an alternate register
+ // to compute the base address(float point registers spill base address).
+ Register new_base = temps.AcquireSameSizeAs(base);
+ __ Add(new_base, base, Operand(spill_offset + core_spill_size));
+ base = new_base;
+ spill_offset = -core_spill_size;
+ int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
+ DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
+ DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
+ }
+
+ if (is_save) {
+ __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
+ __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ } else {
+ __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
+ __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ }
+}
+
+void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (!codegen->IsCoreCalleeSaveRegister(i) && register_set->ContainsCoreRegister(i)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
+ }
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += kXRegSizeInBytes;
+ }
+ }
+
+ for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (!codegen->IsFloatingPointCalleeSaveRegister(i) &&
+ register_set->ContainsFloatingPointRegister(i)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += kDRegSizeInBytes;
+ }
+ }
+
+ SaveRestoreLiveRegistersHelper(codegen, register_set,
+ codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
+}
+
+void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ SaveRestoreLiveRegistersHelper(codegen, register_set,
+ codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
+}
+
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
@@ -527,6 +610,19 @@
GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
}
+vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
+ return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+ core_spill_mask_);
+}
+
+vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
+ GetNumberOfFloatingPointRegisters()));
+ return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+ fpu_spill_mask_);
+}
+
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
__ Bind(GetLabelOf(block));
}