Change pResolveString entrypoint to kSaveEverything.
Test: Run ART test suite including gcstress on host and Nexus 9.
Test: Run ART test suite including gcstress with baker CC on host and Nexus 9.
Bug: 20323084
Change-Id: I63c21a7d3be8ff7a5765b5003c85b5317635efe6
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 3b77880..4a9de7f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -214,7 +214,7 @@
DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
} else {
- // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+ // With the read barrier (non-Baker) enabled, it could be kDexCacheArray in the
// HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
patch.GetType() == LinkerPatch::Type::kTypeRelative ||
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9870876..80b4907 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -429,34 +429,50 @@
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ HLoadString* load = instruction_->AsLoadString();
+ const uint32_t string_index = load->GetStringIndex();
+ Register out = locations->Out().AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
- HLoadString* load = instruction_->AsLoadString();
- const uint32_t string_index = load->GetStringIndex();
+ // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+ // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+ bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+ Register entry_address = temp_is_r0 ? out : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (call_saves_everything_except_r0 && temp_is_r0) {
+ __ mov(entry_address, ShifterOperand(temp));
+ }
+
__ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ // Store the resolved String to the .bss entry.
+ if (call_saves_everything_except_r0) {
+ // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ str(R0, Address(entry_address));
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ CodeGeneratorARM::PcRelativePatchInfo* labels =
+ arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(entry_address, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(entry_address, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(entry_address, entry_address, ShifterOperand(PC));
+ __ str(R0, Address(entry_address));
+ }
+
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
-
RestoreLiveRegisters(codegen, locations);
- // Store the resolved String to the BSS entry.
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
- // .bss entry address in the fast path, so that we can avoid another calculation here.
- CodeGeneratorARM::PcRelativePatchInfo* labels =
- arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
- __ BindTrackedLabel(&labels->movw_label);
- __ movw(IP, /* placeholder */ 0u);
- __ BindTrackedLabel(&labels->movt_label);
- __ movt(IP, /* placeholder */ 0u);
- __ BindTrackedLabel(&labels->add_pc_label);
- __ add(IP, IP, ShifterOperand(PC));
- __ str(locations->Out().AsRegister<Register>(), Address(IP));
-
__ b(GetExitLabel());
}
@@ -5704,10 +5720,25 @@
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RegisterLocation(R0));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything, including temps.
+ // Note that IP may theoretically be clobbered by saving/restoring the live register
+ // (only one thanks to the custom calling convention), so we request a different temp.
+ locations->AddTemp(Location::RequiresRegister());
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -5743,15 +5774,16 @@
}
case HLoadString::LoadKind::kBssEntry: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
CodeGeneratorARM::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
__ BindTrackedLabel(&labels->movw_label);
- __ movw(out, /* placeholder */ 0u);
+ __ movw(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->movt_label);
- __ movt(out, /* placeholder */ 0u);
+ __ movt(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->add_pc_label);
- __ add(out, out, ShifterOperand(PC));
- GenerateGcRootFieldLoad(load, out_loc, out, 0);
+ __ add(temp, temp, ShifterOperand(PC));
+ GenerateGcRootFieldLoad(load, out_loc, temp, 0);
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
codegen_->AddSlowPath(slow_path);
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -5765,6 +5797,7 @@
// TODO: Consider re-adding the compiler code to do string dex cache lookup again.
DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 969d653..8197787 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -331,13 +331,20 @@
class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
public:
- explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
+ LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
+ : SlowPathCodeARM64(instruction),
+ temp_(temp),
+ adrp_label_(adrp_label) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
+ UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+ temps.Exclude(temp_);
+
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -352,21 +359,21 @@
RestoreLiveRegisters(codegen, locations);
// Store the resolved String to the BSS entry.
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
- Register temp = temps.AcquireX();
const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary
- // for the ADRP in the fast path, so that we can avoid the ADRP here.
- vixl::aarch64::Label* adrp_label =
- arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
- arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp);
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // The string entry page address was preserved in temp_ thanks to kSaveEverything.
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
+ adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
+ arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
+ }
vixl::aarch64::Label* strp_label =
- arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+ arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
{
SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
__ Bind(strp_label);
__ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
- MemOperand(temp, /* offset placeholder */ 0));
+ MemOperand(temp_, /* offset placeholder */ 0));
}
__ B(GetExitLabel());
@@ -375,6 +382,9 @@
const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
private:
+ const Register temp_;
+ vixl::aarch64::Label* adrp_label_;
+
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
};
@@ -4238,11 +4248,24 @@
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
InvokeRuntimeCallingConvention calling_convention;
locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything, including temps.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+ RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Primitive::kPrimNot).GetCode());
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -4277,18 +4300,21 @@
const DexFile& dex_file = load->GetDexFile();
uint32_t string_index = load->GetStringIndex();
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+ Register temp = temps.AcquireX();
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
- codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+ codegen_->EmitAdrpPlaceholder(adrp_label, temp);
// Add LDR with its PC-relative String patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
GenerateGcRootFieldLoad(load,
load->GetLocations()->Out(),
- out.X(),
+ temp,
/* placeholder */ 0u,
ldr_label);
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
codegen_->AddSlowPath(slow_path);
__ Cbz(out.X(), slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -4300,6 +4326,7 @@
// TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
__ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b23599..ab60671 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6059,8 +6059,7 @@
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
- load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+ if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6068,6 +6067,17 @@
locations->SetOut(Location::RegisterLocation(EAX));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -6114,6 +6124,7 @@
// TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 28638d7..6518c32 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -299,9 +299,9 @@
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
- __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(string_index));
x86_64_codegen->InvokeRuntime(kQuickResolveString,
instruction_,
instruction_->GetDexPc(),
@@ -5450,10 +5450,20 @@
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RegisterLocation(RAX));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything.
+ // Custom calling convention: RAX serves as both input and output.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(RAX));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -5493,9 +5503,8 @@
}
// TODO: Re-add the compiler code to do string dex cache lookup again.
- InvokeRuntimeCallingConvention calling_convention;
- __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
- Immediate(load->GetStringIndex()));
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
codegen_->InvokeRuntime(kQuickResolveString,
load,
load->GetDexPc());
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index cdb4c25..bf70c55 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -239,6 +239,30 @@
.cfi_adjust_cfa_offset -56
.endm
+.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
+ add sp, #8 @ rewind sp
+ .cfi_adjust_cfa_offset -8
+ vpop {d0-d15}
+ .cfi_adjust_cfa_offset -128
+ add sp, #4 @ skip r0
+ .cfi_adjust_cfa_offset -4
+ .cfi_restore r0 @ debugger can no longer restore caller's r0
+ pop {r1-r12, lr} @ 13 words of callee saves
+ .cfi_restore r1
+ .cfi_restore r2
+ .cfi_restore r3
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ .cfi_restore r8
+ .cfi_restore r9
+ .cfi_restore r10
+ .cfi_restore r11
+ .cfi_restore r12
+ .cfi_restore lr
+ .cfi_adjust_cfa_offset -52
+.endm
+
.macro RETURN_IF_RESULT_IS_ZERO
cbnz r0, 1f @ result non-zero branch over
bx lr @ return
@@ -252,17 +276,23 @@
.endm
/*
- * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
- * exception is Thread::Current()->exception_
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_ when the runtime method frame is ready.
*/
-.macro DELIVER_PENDING_EXCEPTION
- .fnend
- .fnstart
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
mov r0, r9 @ pass Thread::Current
bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*)
.endm
+ /*
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_.
+ */
+.macro DELIVER_PENDING_EXCEPTION
+ SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+.endm
+
.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
@@ -1078,41 +1108,71 @@
*/
ENTRY art_quick_resolve_string
- ldr r1, [sp] @ load referrer
- ldr r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class
- ldr r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
- ubfx r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS
- add r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
- ldrd r2, r3, [r1] @ load index into r3 and pointer into r2
- cmp r0, r3
+ push {r10-r12, lr}
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset r10, 0
+ .cfi_rel_offset r11, 4
+ .cfi_rel_offset ip, 8
+ .cfi_rel_offset lr, 12
+ ldr r10, [sp, #16] @ load referrer
+ ldr r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class
+ ldr r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
+ ubfx r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
+ add r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
+ ldrd r10, r11, [r10] @ load index into r11 and pointer into r10
+ cmp r0, r11
bne .Lart_quick_resolve_string_slow_path
#ifdef USE_READ_BARRIER
- ldr r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz r3, .Lart_quick_resolve_string_marking
+ ldr r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
+ cbnz r0, .Lart_quick_resolve_string_marking
+.Lart_quick_resolve_string_no_rb:
#endif
- mov r0, r2
- bx lr
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC
- mov r1, r9 @ pass Thread::Current
- mov r3, sp
- bl artResolveStringFromCode @ (uint32_t type_idx, Method* method, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ mov r0, r10
+ pop {r10-r12, pc}
+
+#ifdef USE_READ_BARRIER
// GC is marking case, need to check the mark bit.
.Lart_quick_resolve_string_marking:
- ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tst r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
- mov r0, r2
- bne .Lart_quick_resolve_string_no_rb
- push {r1, r2, r3, lr} @ Save x1, LR
- .cfi_adjust_cfa_offset 16
- bl artReadBarrierMark @ Get the marked string back.
- pop {r1, r2, r3, lr} @ Restore registers.
+ ldr r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ lsrs r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
+ bcs .Lart_quick_resolve_string_no_rb
+ mov r0, r10
+ .cfi_remember_state
+ pop {r10-r12, lr}
.cfi_adjust_cfa_offset -16
-.Lart_quick_resolve_string_no_rb:
+ .cfi_restore r10
+ .cfi_restore r11
+ .cfi_restore r12
+ .cfi_restore lr
+ // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
+ b .Lslow_rb_art_quick_read_barrier_mark_reg00 @ Get the marked string back.
+ .cfi_restore_state
+#endif
+
+// Slow path case, the index did not match
+.Lart_quick_resolve_string_slow_path:
+ push {r0-r9} @ 10 words of callee saves and args; {r10-r12, lr} already saved.
+ .cfi_adjust_cfa_offset 40
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ .cfi_rel_offset r5, 20
+ .cfi_rel_offset r6, 24
+ .cfi_rel_offset r7, 28
+ .cfi_rel_offset r8, 32
+ .cfi_rel_offset r9, 36
+ SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 @ save callee saves in case of GC
+ mov r1, r9 @ pass Thread::Current
+ bl artResolveStringFromCode @ (uint32_t type_idx, Thread*)
+ cbz r0, 1f @ If result is null, deliver the OOME.
+ .cfi_remember_state
+ RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
bx lr
+ .cfi_restore_state
+1:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END art_quick_resolve_string
// Generate the allocation entrypoints for each allocator.
@@ -1920,6 +1980,8 @@
* getting its argument and returning its result through register
* `reg`, saving and restoring all caller-save registers.
*
+ * IP is clobbered; `reg` must not be IP.
+ *
* If `reg` is different from `r0`, the generated function follows a
* non-standard runtime calling convention:
* - register `reg` is used to pass the (sole) argument of this
@@ -1936,36 +1998,71 @@
SMART_CBZ \reg, .Lret_rb_\name
// Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
- ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+ tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
beq .Lslow_rb_\name
// Already marked, return right away.
+.Lret_rb_\name:
bx lr
.Lslow_rb_\name:
- push {r0-r5, r9, lr} @ save return address and core caller-save registers
- @ also save callee save r5 for 16 byte alignment
+ // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here.
+ push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip
.cfi_adjust_cfa_offset 32
.cfi_rel_offset r0, 0
.cfi_rel_offset r1, 4
.cfi_rel_offset r2, 8
.cfi_rel_offset r3, 12
.cfi_rel_offset r4, 16
- .cfi_rel_offset r5, 20
- .cfi_rel_offset r9, 24
+ .cfi_rel_offset r9, 20
+ .cfi_rel_offset ip, 24
.cfi_rel_offset lr, 28
- vpush {s0-s15} @ save floating-point caller-save registers
- .cfi_adjust_cfa_offset 64
.ifnc \reg, r0
mov r0, \reg @ pass arg1 - obj from `reg`
.endif
+
+ vpush {s0-s15} @ save floating-point caller-save registers
+ .cfi_adjust_cfa_offset 64
bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj)
- mov ip, r0 @ Save result in IP
vpop {s0-s15} @ restore floating-point registers
.cfi_adjust_cfa_offset -64
- pop {r0-r5, r9, lr} @ restore caller-save registers
- mov \reg, ip @ copy result to reg
-.Lret_rb_\name:
+
+ .ifc \reg, r0 @ Save result to the stack slot or destination register.
+ str r0, [sp, #0]
+ .else
+ .ifc \reg, r1
+ str r0, [sp, #4]
+ .else
+ .ifc \reg, r2
+ str r0, [sp, #8]
+ .else
+ .ifc \reg, r3
+ str r0, [sp, #12]
+ .else
+ .ifc \reg, r4
+ str r0, [sp, #16]
+ .else
+ .ifc \reg, r9
+ str r0, [sp, #20]
+ .else
+ mov \reg, r0
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+
+ pop {r0-r4, r9, ip, lr} @ restore caller-save registers
+ .cfi_adjust_cfa_offset -32
+ .cfi_restore r0
+ .cfi_restore r1
+ .cfi_restore r2
+ .cfi_restore r3
+ .cfi_restore r4
+ .cfi_restore r9
+ .cfi_restore ip
+ .cfi_restore lr
bx lr
END \name
.endm
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 04a3cc6..483cee3 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -337,7 +337,7 @@
SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
.endm
-.macro RESTORE_SAVE_EVERYTHING_FRAME
+.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
// Restore FP registers.
// For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
ldr d0, [sp, #8]
@@ -359,7 +359,6 @@
ldr d31, [sp, #256]
// Restore core registers.
- RESTORE_REG x0, 264
RESTORE_TWO_REGS x1, x2, 272
RESTORE_TWO_REGS x3, x4, 288
RESTORE_TWO_REGS x5, x6, 304
@@ -379,6 +378,11 @@
DECREASE_FRAME 512
.endm
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+ RESTORE_REG x0, 264
+ RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
+.endm
+
.macro RETURN_IF_RESULT_IS_ZERO
cbnz x0, 1f // result non-zero branch over
ret // return
@@ -392,11 +396,10 @@
.endm
/*
- * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
- * exception is Thread::Current()->exception_
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_ when the runtime method frame is ready.
*/
-.macro DELIVER_PENDING_EXCEPTION
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
mov x0, xSELF
// Point of no return.
@@ -404,6 +407,15 @@
brk 0 // Unreached
.endm
+ /*
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_.
+ */
+.macro DELIVER_PENDING_EXCEPTION
+ SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+.endm
+
.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET] // Get exception field.
cbnz \reg, 1f
@@ -1638,40 +1650,54 @@
*/
ENTRY art_quick_resolve_string
- ldr x1, [sp] // load referrer
- ldr w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class
- ldr x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache
- ubfx x2, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into x2
- ldr x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x2
- cmp x0, x2, lsr #32 // compare against upper 32 bits
+ SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
+ ldr x29, [sp, #(2 * __SIZEOF_POINTER__)] // load referrer
+ ldr w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class
+ ldr x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache
+ ubfx lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into LR
+ ldr x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x29
+ cmp x0, x29, lsr #32 // compare against upper 32 bits
bne .Lart_quick_resolve_string_slow_path
- ubfx x0, x2, #0, #32 // extract lower 32 bits into x0
+ ubfx x0, x29, #0, #32 // extract lower 32 bits into x0
#ifdef USE_READ_BARRIER
// Most common case: GC is not marking.
- ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
- cbnz x3, .Lart_quick_resolve_string_marking
+ ldr w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+ cbnz x29, .Lart_quick_resolve_string_marking
+.Lart_quick_resolve_string_no_rb:
#endif
+ .cfi_remember_state
+ RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
ret
+ .cfi_restore_state
+ .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598
+
+#ifdef USE_READ_BARRIER
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+ ldr x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tbnz x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
+ .cfi_remember_state
+ RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
+ // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
+ b .Lslow_rb_art_quick_read_barrier_mark_reg00 // Get the marked string back.
+ .cfi_restore_state
+ .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598
+#endif
// Slow path case, the index did not match.
.Lart_quick_resolve_string_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
+ INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
+ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR // save callee saves in case of GC
mov x1, xSELF // pass Thread::Current
bl artResolveStringFromCode // (int32_t string_idx, Thread* self)
- RESTORE_SAVE_REFS_ONLY_FRAME
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
- ldr x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tbnz x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
- // Save LR so that we can return, also x1 for alignment purposes.
- SAVE_TWO_REGS_INCREASE_FRAME x1, xLR, 16 // Save x1, LR.
- bl artReadBarrierMark // Get the marked string back.
- RESTORE_TWO_REGS_DECREASE_FRAME x1, xLR, 16 // Restore registers.
-.Lart_quick_resolve_string_no_rb:
- ret
-
+ cbz w0, 1f // If result is null, deliver the OOME.
+ .cfi_remember_state
+ RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
+ ret // return
+ .cfi_restore_state
+ .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING // workaround for clang bug: 31975598
+1:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END art_quick_resolve_string
// Generate the allocation entrypoints for each allocator.
@@ -2513,9 +2539,10 @@
*/
// Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
+ tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name
+.Lret_rb_\name:
ret
-.Lslow_path_rb_\name:
+.Lslow_rb_\name:
// Save all potentially live caller-save core registers.
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368
SAVE_TWO_REGS x2, x3, 16
@@ -2580,7 +2607,6 @@
// Restore return address and remove padding.
RESTORE_REG xLR, 360
DECREASE_FRAME 368
-.Lret_rb_\name:
ret
END \name
.endm
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 7bb59ef..f4f9a68 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -224,12 +224,11 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
- * when EDI is already saved.
+ * when EDI and ESI are already saved.
*/
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg)
// Save core registers from highest to lowest to agree with core spills bitmap.
- // EDI, or at least a placeholder for it, is already on the stack.
- PUSH esi
+ // EDI and ESI, or at least placeholders for them, are already on the stack.
PUSH ebp
PUSH ebx
PUSH edx
@@ -268,13 +267,25 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ * when EDI is already saved.
+ */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+ // Save core registers from highest to lowest to agree with core spills bitmap.
+ // EDI, or at least a placeholder for it, is already on the stack.
+ PUSH esi
+ SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+END_MACRO
+
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
*/
MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
PUSH edi
SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
END_MACRO
-MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
// Restore FPRs. Method and padding is still on the stack.
movsd 16(%esp), %xmm0
movsd 24(%esp), %xmm1
@@ -284,13 +295,10 @@
movsd 56(%esp), %xmm5
movsd 64(%esp), %xmm6
movsd 72(%esp), %xmm7
+END_MACRO
- // Remove save everything callee save method, stack alignment padding and FPRs.
- addl MACRO_LITERAL(16 + 8 * 8), %esp
- CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
-
- // Restore core registers.
- POP eax
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX)
+ // Restore core registers (except eax).
POP ecx
POP edx
POP ebx
@@ -299,12 +307,32 @@
POP edi
END_MACRO
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+ RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+ // Remove save everything callee save method, stack alignment padding and FPRs.
+ addl MACRO_LITERAL(16 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+ POP eax
+ RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX)
+ RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+ // Remove save everything callee save method, stack alignment padding and FPRs, skip EAX.
+ addl MACRO_LITERAL(16 + 8 * 8 + 4), %esp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8 + 4))
+
+ RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX
+END_MACRO
+
/*
- * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
- * exception is Thread::Current()->exception_.
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_ when the runtime method frame is ready.
*/
-MACRO0(DELIVER_PENDING_EXCEPTION)
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
+MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
// Outgoing argument set up
subl MACRO_LITERAL(12), %esp // alignment padding
CFI_ADJUST_CFA_OFFSET(12)
@@ -314,6 +342,15 @@
UNREACHABLE
END_MACRO
+ /*
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_.
+ */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+ SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_MACRO
+
MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
@@ -1114,26 +1151,42 @@
END_FUNCTION art_quick_alloc_object_region_tlab
DEFINE_FUNCTION art_quick_resolve_string
- movl 4(%esp), %ecx // get referrer
- movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx // get declaring class
- movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx // get string dex cache
- movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx
- andl %eax, %edx
- movlps (%ecx, %edx, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0
- movd %xmm0, %ecx // extract pointer
+ PUSH edi
+ PUSH esi
+ // Save xmm0 at an aligned address on the stack.
+ subl MACRO_LITERAL(12), %esp
+ CFI_ADJUST_CFA_OFFSET(12)
+ movsd %xmm0, 0(%esp)
+ movl 24(%esp), %edi // get referrer
+ movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi // get declaring class
+ movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi // get string dex cache
+ movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
+ andl %eax, %esi
+ movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0
+ movd %xmm0, %edi // extract pointer
pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits
- movd %xmm0, %edx // extract index
- cmp %edx, %eax
+ movd %xmm0, %esi // extract index
+ // Restore xmm0 and remove it together with padding from the stack.
+ movsd 0(%esp), %xmm0
+ addl MACRO_LITERAL(12), %esp
+ CFI_ADJUST_CFA_OFFSET(-12)
+ cmp %esi, %eax
jne .Lart_quick_resolve_string_slow_path
- movl %ecx, %eax
+ movl %edi, %eax
+ CFI_REMEMBER_STATE
+ POP esi
+ POP edi
#ifdef USE_READ_BARRIER
cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_resolve_string_marking
+ jne .Lnot_null_art_quick_read_barrier_mark_reg00
#endif
ret
+ CFI_RESTORE_STATE
+ CFI_DEF_CFA(esp, 24) // workaround for clang bug: 31975598
+
.Lart_quick_resolve_string_slow_path:
// Outgoing argument set up
- SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
+ SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
subl LITERAL(8), %esp // push padding
CFI_ADJUST_CFA_OFFSET(8)
pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
@@ -1142,21 +1195,15 @@
call SYMBOL(artResolveStringFromCode)
addl LITERAL(16), %esp // pop arguments
CFI_ADJUST_CFA_OFFSET(-16)
- RESTORE_SAVE_REFS_ONLY_FRAME
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-.Lart_quick_resolve_string_marking:
- SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
- jnz .Lart_quick_resolve_string_no_rb
- subl LITERAL(12), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(12)
- PUSH eax // Pass the string as the first param.
- call SYMBOL(artReadBarrierMark)
- addl LITERAL(16), %esp
- CFI_ADJUST_CFA_OFFSET(-16)
-.Lart_quick_resolve_string_no_rb:
- RESTORE_SAVE_REFS_ONLY_FRAME
+ testl %eax, %eax // If result is null, deliver the OOME.
+ jz 1f
+ CFI_REMEMBER_STATE
+ RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
ret
+ CFI_RESTORE_STATE
+ CFI_DEF_CFA(esp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598
+1:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_quick_resolve_string
ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -2102,6 +2149,7 @@
// Null check so that we can load the lock word.
test REG_VAR(reg), REG_VAR(reg)
jz .Lret_rb_\name
+.Lnot_null_\name:
// Check the mark bit, if it is 1 return.
testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
jz .Lslow_rb_\name
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index af4a6c4..28018c5 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -76,6 +76,8 @@
#define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
#define CFI_RESTORE(reg) .cfi_restore reg
#define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+ #define CFI_RESTORE_STATE .cfi_restore_state
+ #define CFI_REMEMBER_STATE .cfi_remember_state
#else
// Mac OS' doesn't like cfi_* directives.
#define CFI_STARTPROC
@@ -85,6 +87,8 @@
#define CFI_DEF_CFA_REGISTER(reg)
#define CFI_RESTORE(reg)
#define CFI_REL_OFFSET(reg,size)
+ #define CFI_RESTORE_STATE
+ #define CFI_REMEMBER_STATE
#endif
// Symbols.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 54e52e5..92273bf 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -263,16 +263,15 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
- * when R15 is already saved.
+ * when R14 and R15 are already saved.
*/
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED)
#if defined(__APPLE__)
int3
int3
#else
// Save core registers from highest to lowest to agree with core spills bitmap.
- // R15, or at least a placeholder for it, is already on the stack.
- PUSH r14
+ // R14 and R15, or at least placeholders for them, are already on the stack.
PUSH r13
PUSH r12
PUSH r11
@@ -326,13 +325,23 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
+ * when R15 is already saved.
+ */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
+ PUSH r14
+ SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+END_MACRO
+
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveEverything)
*/
MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
PUSH r15
SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
END_MACRO
-MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
// Restore FPRs. Method and padding is still on the stack.
movq 16(%rsp), %xmm0
movq 24(%rsp), %xmm1
@@ -350,12 +359,10 @@
movq 120(%rsp), %xmm13
movq 128(%rsp), %xmm14
movq 136(%rsp), %xmm15
+END_MACRO
- // Remove save everything callee save method, stack alignment padding and FPRs.
- addq MACRO_LITERAL(16 + 16 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
- // Restore callee and GPR args, mixed together to agree with core spills bitmap.
- POP rax
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
+ // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
POP rcx
POP rdx
POP rbx
@@ -372,19 +379,47 @@
POP r15
END_MACRO
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+ RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+ // Remove save everything callee save method, stack alignment padding and FPRs.
+ addq MACRO_LITERAL(16 + 16 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+
+ POP rax
+ RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
+ RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+ // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
+ addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
+
+ RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
+END_MACRO
/*
- * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
- * exception is Thread::Current()->exception_.
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_ when the runtime method frame is ready.
*/
-MACRO0(DELIVER_PENDING_EXCEPTION)
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw
+MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
// (Thread*) setup
movq %gs:THREAD_SELF_OFFSET, %rdi
call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*)
UNREACHABLE
END_MACRO
+ /*
+ * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+ * exception is Thread::Current()->exception_.
+ */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+ SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_MACRO
+
MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
@@ -1295,45 +1330,48 @@
END_FUNCTION art_quick_alloc_object_initialized_region_tlab
DEFINE_FUNCTION art_quick_resolve_string
- movq 8(%rsp), %rcx // get referrer
- movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx // get declaring class
- movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx // get string dex cache
- movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx
- andq %rdi, %rdx
- movq (%rcx, %rdx, STRING_DEX_CACHE_ELEMENT_SIZE), %rdx
- movl %edx, %eax
- shrq LITERAL(32), %rdx
- cmp %rdx, %rdi
+ // Custom calling convention: RAX serves as both input and output.
+ PUSH r15
+ PUSH r14
+ movq 24(%rsp), %r15 // get referrer
+ movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d // get declaring class
+ movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15 // get string dex cache
+ movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
+ andl %eax, %r14d
+ movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
+ movl %r14d, %r15d
+ shrq LITERAL(32), %r14
+ cmpl %r14d, %eax
jne .Lart_quick_resolve_string_slow_path
+ movl %r15d, %eax
+ CFI_REMEMBER_STATE
+ POP r14
+ POP r15
#ifdef USE_READ_BARRIER
cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
- jne .Lart_quick_resolve_string_marking
+ jne .Lnot_null_art_quick_read_barrier_mark_reg00
#endif
ret
-// Slow path, the index did not match
+ CFI_RESTORE_STATE
+ CFI_DEF_CFA(rsp, 24) // workaround for clang bug: 31975598
+
+// Slow path, the index did not match.
.Lart_quick_resolve_string_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME
- movq %rcx, %rax
+ SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
// Outgoing argument set up
+ movl %eax, %edi // pass string index
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artResolveStringFromCode) // artResolveStringFromCode(arg0, referrer, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax)
- jnz .Lart_quick_resolve_string_no_rb
- // Save LR so that we can return, also x1 for alignment purposes
- PUSH rdi
- PUSH rsi
- subq LITERAL(8), %rsp // 16 byte alignment
- movq %rax, %rdi
- call SYMBOL(artReadBarrierMark)
- addq LITERAL(8), %rsp
- POP rsi
- POP rdi
-.Lart_quick_resolve_string_no_rb:
+
+ testl %eax, %eax // If result is null, deliver the OOME.
+ jz 1f
+ CFI_REMEMBER_STATE
+ RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX // restore frame up to return address
ret
+ CFI_RESTORE_STATE
+ CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598
+1:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_quick_resolve_string
ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -2228,6 +2266,7 @@
// Null check so that we can load the lock word.
testq REG_VAR(reg), REG_VAR(reg)
jz .Lret_rb_\name
+.Lnot_null_\name:
// Check the mark bit, if it is 1 return.
testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
jz .Lslow_rb_\name
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 4311d19..2a3ffab 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -60,7 +60,11 @@
extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedQuickEntrypointChecks sqec(self);
- auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
+ auto* caller = GetCalleeSaveMethodCaller(
+ self,
+ // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything.
+ (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
+ : Runtime::kSaveEverything);
mirror::String* result = ResolveStringFromCode(caller, string_idx);
if (LIKELY(result != nullptr)) {
// For AOT code, we need a write barrier for the dex cache that holds the GC roots in the .bss.