Merge changes I295c7876,Ib4b84b7b
* changes:
ART: Remove PACKED from ArtMethod's ptr_sized_fields_
ART: Rename ArtMethod JNI field
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index b1644df..e213dc4 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -38,7 +38,7 @@
ifneq ($(TMPDIR),)
ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
else
-ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID)
+ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
endif
# core.oat location on the device.
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 9b4042c..f05648c 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -462,7 +462,7 @@
struct XGcOption {
// These defaults are used when the command line arguments for -Xgc:
// are either omitted completely or partially.
- gc::CollectorType collector_type_ = kUseReadBarrier ?
+ gc::CollectorType collector_type_ = kUseReadBarrier ?
// If RB is enabled (currently a build-time decision),
// use CC as the default GC.
gc::kCollectorTypeCC :
@@ -473,6 +473,7 @@
bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
bool verify_pre_sweeping_rosalloc_ = false;
bool verify_post_gc_rosalloc_ = false;
+ bool measure_ = kIsDebugBuild;
bool gcstress_ = false;
};
@@ -515,6 +516,8 @@
xgc.gcstress_ = true;
} else if (gc_option == "nogcstress") {
xgc.gcstress_ = false;
+ } else if (gc_option == "measure") {
+ xgc.measure_ = true;
} else if ((gc_option == "precise") ||
(gc_option == "noprecise") ||
(gc_option == "verifycardtable") ||
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f310565..e3f8a5c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -67,6 +67,7 @@
optimizing/parallel_move_resolver.cc \
optimizing/prepare_for_register_allocation.cc \
optimizing/reference_type_propagation.cc \
+ optimizing/register_allocator.cc \
optimizing/register_allocation_resolver.cc \
optimizing/register_allocator_linear_scan.cc \
optimizing/select_generator.cc \
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47e6625..5e6e175 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -80,7 +80,11 @@
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+ // Save live core and floating-point caller-save registers and
+ // update the stack mask in `locations` for registers holding object
+ // references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ // Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ad0a4f47..236ed20 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -437,11 +437,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // R0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
DCHECK_NE(reg, SP);
@@ -469,8 +467,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -1937,7 +1933,7 @@
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
+ invoke->GetImtIndex(), kArmPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
uint32_t entry_point =
@@ -4437,6 +4433,10 @@
Location out_loc = locations->Out();
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
switch (type) {
case Primitive::kPrimBoolean:
@@ -4451,8 +4451,21 @@
LoadOperandType load_type = GetLoadOperandType(type);
__ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
} else {
- __ add(IP, obj, ShifterOperand(data_offset));
- codegen_->LoadFromShiftedRegOffset(type, out_loc, IP, index.AsRegister<Register>());
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
break;
}
@@ -4481,8 +4494,21 @@
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ add(IP, obj, ShifterOperand(data_offset));
- codegen_->LoadFromShiftedRegOffset(type, out_loc, IP, index.AsRegister<Register>());
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+ }
+ temp = obj;
+ } else {
+ __ add(temp, obj, ShifterOperand(data_offset));
+ }
+ codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
// If read barriers are enabled, emit read barriers other than
@@ -4585,6 +4611,10 @@
uint32_t data_offset =
mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
Location value_loc = locations->InAt(2);
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
switch (value_type) {
case Primitive::kPrimBoolean:
@@ -4599,10 +4629,23 @@
StoreOperandType store_type = GetStoreOperandType(value_type);
__ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset);
} else {
- __ add(IP, array, ShifterOperand(data_offset));
+ Register temp = IP;
+
+ if (has_intermediate_address) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+ DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+ }
+ temp = array;
+ } else {
+ __ add(temp, array, ShifterOperand(data_offset));
+ }
codegen_->StoreToShiftedRegOffset(value_type,
value_loc,
- IP,
+ temp,
index.AsRegister<Register>());
}
break;
@@ -4610,6 +4653,9 @@
case Primitive::kPrimNot: {
Register value = value_loc.AsRegister<Register>();
+ // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+ // See the comment in instruction_simplifier_shared.cc.
+ DCHECK(!has_intermediate_address);
if (instruction->InputAt(2)->IsNullConstant()) {
// Just setting null.
@@ -4832,6 +4878,37 @@
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
+void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location out = locations->Out();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+
+ if (second.IsRegister()) {
+ __ add(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ ShifterOperand(second.AsRegister<Register>()));
+ } else {
+ __ AddConstant(out.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ second.GetConstant()->AsIntConstant()->GetValue());
+ }
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
? LocationSummary::kCallOnSlowPath
@@ -7033,7 +7110,7 @@
method_offset);
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
+ instruction->GetIndex(), kArmPointerSize));
__ LoadFromOffset(kLoadWord,
locations->Out().AsRegister<Register>(),
locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index bf2c598..76b0797 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -603,11 +603,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // W0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
DCHECK_NE(obj_.reg(), LR);
@@ -635,8 +633,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
@@ -690,10 +686,9 @@
instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
+ instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
__ Bind(GetEntryLabel());
@@ -1983,9 +1978,8 @@
}
}
-void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1994,10 +1988,9 @@
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
- HArm64IntermediateAddress* instruction) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
+ HIntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
__ Add(OutputRegister(instruction),
InputRegisterAt(instruction, 0),
@@ -2097,9 +2090,8 @@
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
Register temp = temps.AcquireW();
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2112,15 +2104,15 @@
source = HeapOperand(obj, offset);
} else {
Register temp = temps.AcquireSameSizeAs(obj);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
}
temp = obj;
@@ -2204,15 +2196,15 @@
} else {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireSameSizeAs(array);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ if (instruction->GetArray()->IsIntermediateAddress()) {
// The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
+ // HIntermediateAddress instruction yet.
DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ // `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
}
temp = array;
@@ -2228,7 +2220,7 @@
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
DCHECK(needs_write_barrier);
- DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
vixl::aarch64::Label done;
SlowPathCodeARM64* slow_path = nullptr;
{
@@ -3561,7 +3553,7 @@
__ Ldr(temp,
MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
+ invoke->GetImtIndex(), kArm64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
@@ -5382,7 +5374,7 @@
MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
+ instruction->GetIndex(), kArm64PointerSize));
__ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
__ Ldr(XRegisterFrom(locations->Out()),
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index cf8928f..39248aa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3791,7 +3791,7 @@
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
+ invoke->GetImtIndex(), kMipsPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -5389,7 +5389,7 @@
method_offset);
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
+ instruction->GetIndex(), kMipsPointerSize));
__ LoadFromOffset(kLoadWord,
locations->Out().AsRegister<Register>(),
locations->InAt(0).AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index cf3c42e..29b8c20 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2951,7 +2951,7 @@
__ LoadFromOffset(kLoadDoubleword, temp, temp,
mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
+ invoke->GetImtIndex(), kMips64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 425f31c..82baaa0 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -472,11 +472,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // EAX (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
DCHECK_NE(reg, ESP);
@@ -502,8 +500,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -2093,7 +2089,7 @@
Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
// temp = temp->GetImtEntryAt(method_offset);
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
+ invoke->GetImtIndex(), kX86PointerSize));
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(temp,
@@ -4115,7 +4111,7 @@
Address(locations->InAt(0).AsRegister<Register>(), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
+ instruction->GetIndex(), kX86PointerSize));
__ movl(locations->Out().AsRegister<Register>(),
Address(locations->InAt(0).AsRegister<Register>(),
mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index bd4ded1..b6ba30e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -493,11 +493,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // RDI and/or RAX (if they are live), as they are clobbered by
- // functions art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
DCHECK_NE(reg, RSP);
@@ -523,8 +521,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -2322,7 +2318,7 @@
Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
// temp = temp->GetImtEntryAt(method_offset);
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
+ invoke->GetImtIndex(), kX86_64PointerSize));
// temp = temp->GetImtEntryAt(method_offset);
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -4048,7 +4044,7 @@
Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
} else {
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
- instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
+ instruction->GetIndex(), kX86_64PointerSize));
__ movq(locations->Out().AsRegister<CpuRegister>(),
Address(locations->InAt(0).AsRegister<CpuRegister>(),
mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6f487c5..fe9a7af 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -219,7 +219,7 @@
PrepareForRegisterAllocation(graph).Run();
liveness.Analyze();
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
hook_before_codegen(graph);
InternalCodeAllocator allocator;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index d2afa5b..af0ee4e 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -227,7 +227,7 @@
return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize);
} else {
DCHECK(instr->IsAdd() ||
- instr->IsArm64IntermediateAddress() ||
+ instr->IsIntermediateAddress() ||
instr->IsBoundsCheck() ||
instr->IsCompare() ||
instr->IsCondition() ||
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 790751f..a592162 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -661,7 +661,7 @@
ArtMethod* new_method = nullptr;
if (invoke_instruction->IsInvokeInterface()) {
new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
- method_index % ImTable::kSize, pointer_size);
+ method_index, pointer_size);
if (new_method->IsRuntimeMethod()) {
// Bail out as soon as we see a conflict trampoline in one of the target's
// interface table.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index b412529..afac5f9 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,6 +16,7 @@
#include "instruction_builder.h"
+#include "art_method-inl.h"
#include "bytecode_utils.h"
#include "class_linker.h"
#include "driver/compiler_options.h"
@@ -890,7 +891,7 @@
return_type,
dex_pc,
method_idx,
- resolved_method->GetDexMethodIndex());
+ resolved_method->GetImtIndex());
}
return HandleInvoke(invoke,
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index cd026b8..495f3fd 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -14,8 +14,10 @@
* limitations under the License.
*/
+#include "code_generator.h"
#include "instruction_simplifier_arm.h"
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
namespace art {
namespace arm {
@@ -38,6 +40,46 @@
}
}
+void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
+ size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ Primitive::Type type = instruction->GetType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
+ size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+ size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
+ Primitive::Type type = instruction->GetComponentType();
+
+ if (type == Primitive::kPrimLong
+ || type == Primitive::kPrimFloat
+ || type == Primitive::kPrimDouble) {
+ // T32 doesn't support ShiftedRegOffset mem address mode for these types
+ // to enable optimization.
+ return;
+ }
+
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
+}
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 14c940e..3d297da 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -38,6 +38,8 @@
void VisitMul(HMul* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
+ void VisitArraySet(HArraySet* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 983d31d..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -28,56 +28,6 @@
using helpers::HasShifterOperand;
using helpers::ShifterOperandSupportsExtension;
-void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset) {
- if (kEmitCompilerReadBarrier) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- //
- // TODO: Handle this case properly in the ARM64 code generator and
- // re-enable this optimization; otherwise, remove this TODO.
- // b/26601270
- return;
- }
- if (index->IsConstant() ||
- (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
- // When the index is a constant all the addressing can be fitted in the
- // memory access instruction, so do not split the access.
- return;
- }
- if (access->IsArraySet() &&
- access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
- // The access may require a runtime call or the original array pointer.
- return;
- }
-
- // Proceed to extract the base address computation.
- ArenaAllocator* arena = GetGraph()->GetArena();
-
- HIntConstant* offset = GetGraph()->GetIntConstant(data_offset);
- HArm64IntermediateAddress* address =
- new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
- address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
- access->GetBlock()->InsertInstructionBefore(address, access);
- access->ReplaceInput(address, 0);
- // Both instructions must depend on GC to prevent any instruction that can
- // trigger GC to be inserted between the two.
- access->AddSideEffects(SideEffects::DependsOnGC());
- DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
- // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
- // is an HArm64IntermediateAddress and generate appropriate code.
- // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
- // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would
- // not bring any advantages yet.
- // Also see the comments in
- // `InstructionCodeGeneratorARM64::VisitArrayGet()` and
- // `InstructionCodeGeneratorARM64::VisitArraySet()`.
- RecordSimplification();
-}
-
bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
bool do_merge) {
@@ -190,19 +140,23 @@
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
- TryExtractArrayAccessAddress(instruction,
- instruction->GetArray(),
- instruction->GetIndex(),
- data_offset);
+ if (TryExtractArrayAccessAddress(instruction,
+ instruction->GetArray(),
+ instruction->GetIndex(),
+ data_offset)) {
+ RecordSimplification();
+ }
}
void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4735f85..28648b3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -35,10 +35,6 @@
}
}
- void TryExtractArrayAccessAddress(HInstruction* access,
- HInstruction* array,
- HInstruction* index,
- size_t data_offset);
bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
bool TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dab1ebc..8f7778f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -226,4 +226,59 @@
return false;
}
+
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset) {
+ if (kEmitCompilerReadBarrier) {
+ // The read barrier instrumentation does not support the
+ // HIntermediateAddress instruction yet.
+ //
+ // TODO: Handle this case properly in the ARM64 and ARM code generator and
+ // re-enable this optimization; otherwise, remove this TODO.
+ // b/26601270
+ return false;
+ }
+ if (index->IsConstant() ||
+ (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
+ // When the index is a constant all the addressing can be fitted in the
+ // memory access instruction, so do not split the access.
+ return false;
+ }
+ if (access->IsArraySet() &&
+ access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) {
+ // The access may require a runtime call or the original array pointer.
+ return false;
+ }
+
+ // Proceed to extract the base address computation.
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntermediateAddress* address =
+ new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+ address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 0);
+ // Both instructions must depend on GC to prevent any instruction that can
+ // trigger GC to be inserted between the two.
+ access->AddSideEffects(SideEffects::DependsOnGC());
+ DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
+ // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
+ // is an HIntermediateAddress and generate appropriate code.
+ // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
+ // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
+ // because these new instructions would not bring any advantages yet.
+ // Also see the comments in
+ // `InstructionCodeGeneratorARM::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM::VisitArraySet()`
+ // `InstructionCodeGeneratorARM64::VisitArrayGet()`
+ // `InstructionCodeGeneratorARM64::VisitArraySet()`.
+ return true;
+}
+
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index b1fe8f4..56804f5 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -26,6 +26,11 @@
// a negated bitwise instruction.
bool TryMergeNegatedInput(HBinaryOperation* op);
+bool TryExtractArrayAccessAddress(HInstruction* access,
+ HInstruction* array,
+ HInstruction* index,
+ size_t data_offset);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f0ef26..23ac457 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1289,7 +1289,8 @@
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
M(BitwiseNegatedRight, Instruction) \
- M(MultiplyAccumulate, Instruction)
+ M(MultiplyAccumulate, Instruction) \
+ M(IntermediateAddress, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
@@ -1303,8 +1304,7 @@
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
- M(Arm64DataProcWithShifterOp, Instruction) \
- M(Arm64IntermediateAddress, Instruction)
+ M(Arm64DataProcWithShifterOp, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 06b073c..3f88717 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -94,32 +94,6 @@
std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
-// This instruction computes an intermediate address pointing in the 'middle' of an object. The
-// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
-// never used across anything that can trigger GC.
-class HArm64IntermediateAddress FINAL : public HExpression<2> {
- public:
- HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
- : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
- SetRawInputAt(0, base_address);
- SetRawInputAt(1, offset);
- }
-
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
- bool IsActualObject() const OVERRIDE { return false; }
-
- HInstruction* GetBaseAddress() const { return InputAt(0); }
- HInstruction* GetOffset() const { return InputAt(1); }
-
- DECLARE_INSTRUCTION(Arm64IntermediateAddress);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
-};
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index f2d5cf3..8bd8667 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -113,6 +113,34 @@
DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
};
+
+// This instruction computes an intermediate address pointing in the 'middle' of an object. The
+// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
+// never used across anything that can trigger GC.
+class HIntermediateAddress FINAL : public HExpression<2> {
+ public:
+ HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) {
+ SetRawInputAt(0, base_address);
+ SetRawInputAt(1, offset);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetBaseAddress() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+
+ DECLARE_INSTRUCTION(IntermediateAddress);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
+};
+
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 80affc3..0bca186 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -448,8 +448,12 @@
arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
arm::InstructionSimplifierArm* simplifier =
new (arena) arm::InstructionSimplifierArm(graph, stats);
+ SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch");
HOptimization* arm_optimizations[] = {
simplifier,
+ side_effects,
+ gvn,
fixups
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -531,7 +535,7 @@
}
{
PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
}
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
new file mode 100644
index 0000000..2367ce1
--- /dev/null
+++ b/compiler/optimizing/register_allocator.cc
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "register_allocator.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "base/bit_vector-inl.h"
+#include "code_generator.h"
+#include "register_allocator_linear_scan.h"
+#include "ssa_liveness_analysis.h"
+
+
+namespace art {
+
+RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : allocator_(allocator),
+ codegen_(codegen),
+ liveness_(liveness) {}
+
+RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy) {
+ switch (strategy) {
+ case kRegisterAllocatorLinearScan:
+ return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis);
+ default:
+ LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
+ UNREACHABLE();
+ }
+}
+
+bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
+ InstructionSet instruction_set) {
+ return instruction_set == kArm
+ || instruction_set == kArm64
+ || instruction_set == kMips
+ || instruction_set == kMips64
+ || instruction_set == kThumb2
+ || instruction_set == kX86
+ || instruction_set == kX86_64;
+}
+
+class AllRangesIterator : public ValueObject {
+ public:
+ explicit AllRangesIterator(LiveInterval* interval)
+ : current_interval_(interval),
+ current_range_(interval->GetFirstRange()) {}
+
+ bool Done() const { return current_interval_ == nullptr; }
+ LiveRange* CurrentRange() const { return current_range_; }
+ LiveInterval* CurrentInterval() const { return current_interval_; }
+
+ void Advance() {
+ current_range_ = current_range_->GetNext();
+ if (current_range_ == nullptr) {
+ current_interval_ = current_interval_->GetNextSibling();
+ if (current_interval_ != nullptr) {
+ current_range_ = current_interval_->GetFirstRange();
+ }
+ }
+ }
+
+ private:
+ LiveInterval* current_interval_;
+ LiveRange* current_range_;
+
+ DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
+};
+
+bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
+ size_t number_of_spill_slots,
+ size_t number_of_out_slots,
+ const CodeGenerator& codegen,
+ ArenaAllocator* allocator,
+ bool processing_core_registers,
+ bool log_fatal_on_failure) {
+ size_t number_of_registers = processing_core_registers
+ ? codegen.GetNumberOfCoreRegisters()
+ : codegen.GetNumberOfFloatingPointRegisters();
+ ArenaVector<ArenaBitVector*> liveness_of_values(
+ allocator->Adapter(kArenaAllocRegisterAllocatorValidate));
+ liveness_of_values.reserve(number_of_registers + number_of_spill_slots);
+
+ size_t max_end = 0u;
+ for (LiveInterval* start_interval : intervals) {
+ for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
+ max_end = std::max(max_end, it.CurrentRange()->GetEnd());
+ }
+ }
+
+ // Allocate a bit vector per register. A live interval that has a register
+ // allocated will populate the associated bit vector based on its live ranges.
+ for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
+ liveness_of_values.push_back(
+ ArenaBitVector::Create(allocator, max_end, false, kArenaAllocRegisterAllocatorValidate));
+ }
+
+ for (LiveInterval* start_interval : intervals) {
+ for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
+ LiveInterval* current = it.CurrentInterval();
+ HInstruction* defined_by = current->GetParent()->GetDefinedBy();
+ if (current->GetParent()->HasSpillSlot()
+ // Parameters and current method have their own stack slot.
+ && !(defined_by != nullptr && (defined_by->IsParameterValue()
+ || defined_by->IsCurrentMethod()))) {
+ BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers
+ + current->GetParent()->GetSpillSlot() / kVRegSize
+ - number_of_out_slots];
+ for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+ if (liveness_of_spill_slot->IsBitSet(j)) {
+ if (log_fatal_on_failure) {
+ std::ostringstream message;
+ message << "Spill slot conflict at " << j;
+ LOG(FATAL) << message.str();
+ } else {
+ return false;
+ }
+ } else {
+ liveness_of_spill_slot->SetBit(j);
+ }
+ }
+ }
+
+ if (current->HasRegister()) {
+ if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) {
+ // Only check when an error is fatal. Only tests code ask for non-fatal failures
+ // and test code may not properly fill the right information to the code generator.
+ CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister()));
+ }
+ BitVector* liveness_of_register = liveness_of_values[current->GetRegister()];
+ for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
+ if (liveness_of_register->IsBitSet(j)) {
+ if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
+ continue;
+ }
+ if (log_fatal_on_failure) {
+ std::ostringstream message;
+ message << "Register conflict at " << j << " ";
+ if (defined_by != nullptr) {
+ message << "(" << defined_by->DebugName() << ")";
+ }
+ message << "for ";
+ if (processing_core_registers) {
+ codegen.DumpCoreRegister(message, current->GetRegister());
+ } else {
+ codegen.DumpFloatingPointRegister(message, current->GetRegister());
+ }
+ LOG(FATAL) << message.str();
+ } else {
+ return false;
+ }
+ } else {
+ liveness_of_register->SetBit(j);
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
+ DCHECK_GE(position, interval->GetStart());
+ DCHECK(!interval->IsDeadAt(position));
+ if (position == interval->GetStart()) {
+ // Spill slot will be allocated when handling `interval` again.
+ interval->ClearRegister();
+ if (interval->HasHighInterval()) {
+ interval->GetHighInterval()->ClearRegister();
+ } else if (interval->HasLowInterval()) {
+ interval->GetLowInterval()->ClearRegister();
+ }
+ return interval;
+ } else {
+ LiveInterval* new_interval = interval->SplitAt(position);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+ new_interval->SetHighInterval(high);
+ high->SetLowInterval(new_interval);
+ } else if (interval->HasLowInterval()) {
+ LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+ new_interval->SetLowInterval(low);
+ low->SetHighInterval(new_interval);
+ }
+ return new_interval;
+ }
+}
+
+LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
+ HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
+ HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
+ DCHECK(block_from != nullptr);
+ DCHECK(block_to != nullptr);
+
+ // Both locations are in the same block. We split at the given location.
+ if (block_from == block_to) {
+ return Split(interval, to);
+ }
+
+ /*
+ * Non-linear control flow will force moves at every branch instruction to the new location.
+ * To avoid having all branches doing the moves, we find the next non-linear position and
+ * split the interval at this position. Take the following example (block number is the linear
+ * order position):
+ *
+ * B1
+ * / \
+ * B2 B3
+ * \ /
+ * B4
+ *
+ * B2 needs to split an interval, whose next use is in B4. If we were to split at the
+ * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
+ * is now in the correct location. It makes performance worst if the interval is spilled
+ * and both B2 and B3 need to reload it before entering B4.
+ *
+ * By splitting at B3, we give a chance to the register allocator to allocate the
+ * interval to the same register as in B1, and therefore avoid doing any
+ * moves in B3.
+ */
+ if (block_from->GetDominator() != nullptr) {
+ for (HBasicBlock* dominated : block_from->GetDominator()->GetDominatedBlocks()) {
+ size_t position = dominated->GetLifetimeStart();
+ if ((position > from) && (block_to->GetLifetimeStart() > position)) {
+ // Even if we found a better block, we continue iterating in case
+ // a dominated block is closer.
+ // Note that dominated blocks are not sorted in liveness order.
+ block_to = dominated;
+ DCHECK_NE(block_to, block_from);
+ }
+ }
+ }
+
+ // If `to` is in a loop, find the outermost loop header which does not contain `from`.
+ for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
+ HBasicBlock* header = it.Current()->GetHeader();
+ if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) {
+ break;
+ }
+ block_to = header;
+ }
+
+ // Split at the start of the found block, to piggy back on existing moves
+ // due to resolution if non-linear control flow (see `ConnectSplitSiblings`).
+ return Split(interval, block_to->GetLifetimeStart());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
new file mode 100644
index 0000000..729eede
--- /dev/null
+++ b/compiler/optimizing/register_allocator.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
+#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
+
+#include "arch/instruction_set.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "base/macros.h"
+#include "primitive.h"
+
+namespace art {
+
+class CodeGenerator;
+class HBasicBlock;
+class HGraph;
+class HInstruction;
+class HParallelMove;
+class LiveInterval;
+class Location;
+class SsaLivenessAnalysis;
+
+/**
+ * Base class for any register allocator.
+ */
+class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> {
+ public:
+ enum Strategy {
+ kRegisterAllocatorLinearScan
+ };
+
+ static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
+
+ static RegisterAllocator* Create(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis,
+ Strategy strategy = kRegisterAllocatorDefault);
+
+ virtual ~RegisterAllocator() = default;
+
+ // Main entry point for the register allocator. Given the liveness analysis,
+ // allocates registers to live intervals.
+ virtual void AllocateRegisters() = 0;
+
+ // Validate that the register allocator did not allocate the same register to
+ // intervals that intersect each other. Returns false if it failed.
+ virtual bool Validate(bool log_fatal_on_failure) = 0;
+
+ static bool CanAllocateRegistersFor(const HGraph& graph,
+ InstructionSet instruction_set);
+
+ // Verifies that live intervals do not conflict. Used by unit testing.
+ static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
+ size_t number_of_spill_slots,
+ size_t number_of_out_slots,
+ const CodeGenerator& codegen,
+ ArenaAllocator* allocator,
+ bool processing_core_registers,
+ bool log_fatal_on_failure);
+
+ static constexpr const char* kRegisterAllocatorPassName = "register";
+
+ protected:
+ RegisterAllocator(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
+
+ // Split `interval` at the position `position`. The new interval starts at `position`.
+ // If `position` is at the start of `interval`, returns `interval` with its
+ // register location(s) cleared.
+ static LiveInterval* Split(LiveInterval* interval, size_t position);
+
+ // Split `interval` at a position between `from` and `to`. The method will try
+ // to find an optimal split position.
+ LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
+
+ ArenaAllocator* const allocator_;
+ CodeGenerator* const codegen_;
+ const SsaLivenessAnalysis& liveness_;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index c1797b0..a9151ba 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -38,12 +38,10 @@
return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
}
-RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& liveness)
- : allocator_(allocator),
- codegen_(codegen),
- liveness_(liveness),
+RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& liveness)
+ : RegisterAllocator(allocator, codegen, liveness),
unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
unhandled_(nullptr),
@@ -83,17 +81,6 @@
codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
}
-bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
- InstructionSet instruction_set) {
- return instruction_set == kArm
- || instruction_set == kArm64
- || instruction_set == kMips
- || instruction_set == kMips64
- || instruction_set == kThumb2
- || instruction_set == kX86
- || instruction_set == kX86_64;
-}
-
static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
if (interval == nullptr) return false;
bool is_core_register = (interval->GetType() != Primitive::kPrimDouble)
@@ -101,7 +88,7 @@
return processing_core_registers == is_core_register;
}
-void RegisterAllocator::AllocateRegisters() {
+void RegisterAllocatorLinearScan::AllocateRegisters() {
AllocateRegistersInternal();
RegisterAllocationResolver(allocator_, codegen_, liveness_)
.Resolve(maximum_number_of_live_core_registers_,
@@ -141,7 +128,7 @@
}
}
-void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
+void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
int reg = location.reg();
DCHECK(location.IsRegister() || location.IsFpuRegister());
LiveInterval* interval = location.IsRegister()
@@ -162,7 +149,7 @@
interval->AddRange(start, end);
}
-void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
BlockRegister(Location::RegisterLocation(i), start, end);
@@ -175,7 +162,7 @@
}
}
-void RegisterAllocator::AllocateRegistersInternal() {
+void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
// Iterate post-order, to ensure the list is sorted, and the last added interval
// is the one with the lowest start position.
for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
@@ -235,7 +222,7 @@
LinearScan();
}
-void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
+void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) {
LocationSummary* locations = instruction->GetLocations();
size_t position = instruction->GetLifetimePosition();
@@ -452,7 +439,7 @@
DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
};
-bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
+bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
// To simplify unit testing, we eagerly create the array of intervals, and
// call the helper method.
ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate));
@@ -482,99 +469,7 @@
allocator_, processing_core_registers_, log_fatal_on_failure);
}
-bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
- size_t number_of_spill_slots,
- size_t number_of_out_slots,
- const CodeGenerator& codegen,
- ArenaAllocator* allocator,
- bool processing_core_registers,
- bool log_fatal_on_failure) {
- size_t number_of_registers = processing_core_registers
- ? codegen.GetNumberOfCoreRegisters()
- : codegen.GetNumberOfFloatingPointRegisters();
- ArenaVector<ArenaBitVector*> liveness_of_values(
- allocator->Adapter(kArenaAllocRegisterAllocatorValidate));
- liveness_of_values.reserve(number_of_registers + number_of_spill_slots);
-
- size_t max_end = 0u;
- for (LiveInterval* start_interval : intervals) {
- for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
- max_end = std::max(max_end, it.CurrentRange()->GetEnd());
- }
- }
-
- // Allocate a bit vector per register. A live interval that has a register
- // allocated will populate the associated bit vector based on its live ranges.
- for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
- liveness_of_values.push_back(
- ArenaBitVector::Create(allocator, max_end, false, kArenaAllocRegisterAllocatorValidate));
- }
-
- for (LiveInterval* start_interval : intervals) {
- for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
- LiveInterval* current = it.CurrentInterval();
- HInstruction* defined_by = current->GetParent()->GetDefinedBy();
- if (current->GetParent()->HasSpillSlot()
- // Parameters and current method have their own stack slot.
- && !(defined_by != nullptr && (defined_by->IsParameterValue()
- || defined_by->IsCurrentMethod()))) {
- BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers
- + current->GetParent()->GetSpillSlot() / kVRegSize
- - number_of_out_slots];
- for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
- if (liveness_of_spill_slot->IsBitSet(j)) {
- if (log_fatal_on_failure) {
- std::ostringstream message;
- message << "Spill slot conflict at " << j;
- LOG(FATAL) << message.str();
- } else {
- return false;
- }
- } else {
- liveness_of_spill_slot->SetBit(j);
- }
- }
- }
-
- if (current->HasRegister()) {
- if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) {
- // Only check when an error is fatal. Only tests code ask for non-fatal failures
- // and test code may not properly fill the right information to the code generator.
- CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister()));
- }
- BitVector* liveness_of_register = liveness_of_values[current->GetRegister()];
- for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
- if (liveness_of_register->IsBitSet(j)) {
- if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
- continue;
- }
- if (log_fatal_on_failure) {
- std::ostringstream message;
- message << "Register conflict at " << j << " ";
- if (defined_by != nullptr) {
- message << "(" << defined_by->DebugName() << ")";
- }
- message << "for ";
- if (processing_core_registers) {
- codegen.DumpCoreRegister(message, current->GetRegister());
- } else {
- codegen.DumpFloatingPointRegister(message, current->GetRegister());
- }
- LOG(FATAL) << message.str();
- } else {
- return false;
- }
- } else {
- liveness_of_register->SetBit(j);
- }
- }
- }
- }
- }
- return true;
-}
-
-void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
+void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const {
interval->Dump(stream);
stream << ": ";
if (interval->HasRegister()) {
@@ -589,7 +484,7 @@
stream << std::endl;
}
-void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
+void RegisterAllocatorLinearScan::DumpAllIntervals(std::ostream& stream) const {
stream << "inactive: " << std::endl;
for (LiveInterval* inactive_interval : inactive_) {
DumpInterval(stream, inactive_interval);
@@ -611,7 +506,7 @@
}
// By the book implementation of a linear scan register allocator.
-void RegisterAllocator::LinearScan() {
+void RegisterAllocatorLinearScan::LinearScan() {
while (!unhandled_->empty()) {
// (1) Remove interval with the lowest start position from unhandled.
LiveInterval* current = unhandled_->back();
@@ -742,7 +637,7 @@
// Find a free register. If multiple are found, pick the register that
// is free the longest.
-bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
+bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
size_t* free_until = registers_array_;
// First set all registers to be free.
@@ -865,13 +760,13 @@
return true;
}
-bool RegisterAllocator::IsBlocked(int reg) const {
+bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
return processing_core_registers_
? blocked_core_registers_[reg]
: blocked_fp_registers_[reg];
}
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
int reg = kNoRegister;
// Pick the register pair that is used the last.
for (size_t i = 0; i < number_of_registers_; ++i) {
@@ -896,13 +791,13 @@
return reg;
}
-bool RegisterAllocator::IsCallerSaveRegister(int reg) const {
+bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
return processing_core_registers_
? !codegen_->IsCoreCalleeSaveRegister(reg)
: !codegen_->IsFloatingPointCalleeSaveRegister(reg);
}
-int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
+int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
// We special case intervals that do not span a safepoint to try to find a caller-save
// register if one is available. We iterate from 0 to the number of registers,
// so if there are caller-save registers available at the end, we continue the iteration.
@@ -965,9 +860,9 @@
}
}
-bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use) {
+bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
LiveInterval* active = *it;
DCHECK(active->HasRegister());
@@ -997,7 +892,7 @@
// Find the register that is used the last, and spill the interval
// that holds it. If the first use of `current` is after that register
// we spill `current` instead.
-bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
+bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
size_t first_register_use = current->FirstRegisterUse();
if (current->HasRegister()) {
DCHECK(current->IsHighInterval());
@@ -1180,7 +1075,7 @@
}
}
-void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
+void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
size_t insert_at = 0;
for (size_t i = array->size(); i > 0; --i) {
@@ -1209,93 +1104,7 @@
}
}
-LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
- HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
- HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
- DCHECK(block_from != nullptr);
- DCHECK(block_to != nullptr);
-
- // Both locations are in the same block. We split at the given location.
- if (block_from == block_to) {
- return Split(interval, to);
- }
-
- /*
- * Non-linear control flow will force moves at every branch instruction to the new location.
- * To avoid having all branches doing the moves, we find the next non-linear position and
- * split the interval at this position. Take the following example (block number is the linear
- * order position):
- *
- * B1
- * / \
- * B2 B3
- * \ /
- * B4
- *
- * B2 needs to split an interval, whose next use is in B4. If we were to split at the
- * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
- * is now in the correct location. It makes performance worst if the interval is spilled
- * and both B2 and B3 need to reload it before entering B4.
- *
- * By splitting at B3, we give a chance to the register allocator to allocate the
- * interval to the same register as in B1, and therefore avoid doing any
- * moves in B3.
- */
- if (block_from->GetDominator() != nullptr) {
- for (HBasicBlock* dominated : block_from->GetDominator()->GetDominatedBlocks()) {
- size_t position = dominated->GetLifetimeStart();
- if ((position > from) && (block_to->GetLifetimeStart() > position)) {
- // Even if we found a better block, we continue iterating in case
- // a dominated block is closer.
- // Note that dominated blocks are not sorted in liveness order.
- block_to = dominated;
- DCHECK_NE(block_to, block_from);
- }
- }
- }
-
- // If `to` is in a loop, find the outermost loop header which does not contain `from`.
- for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
- HBasicBlock* header = it.Current()->GetHeader();
- if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) {
- break;
- }
- block_to = header;
- }
-
- // Split at the start of the found block, to piggy back on existing moves
- // due to resolution if non-linear control flow (see `ConnectSplitSiblings`).
- return Split(interval, block_to->GetLifetimeStart());
-}
-
-LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
- DCHECK_GE(position, interval->GetStart());
- DCHECK(!interval->IsDeadAt(position));
- if (position == interval->GetStart()) {
- // Spill slot will be allocated when handling `interval` again.
- interval->ClearRegister();
- if (interval->HasHighInterval()) {
- interval->GetHighInterval()->ClearRegister();
- } else if (interval->HasLowInterval()) {
- interval->GetLowInterval()->ClearRegister();
- }
- return interval;
- } else {
- LiveInterval* new_interval = interval->SplitAt(position);
- if (interval->HasHighInterval()) {
- LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
- new_interval->SetHighInterval(high);
- high->SetLowInterval(new_interval);
- } else if (interval->HasLowInterval()) {
- LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
- new_interval->SetLowInterval(low);
- low->SetHighInterval(new_interval);
- }
- return new_interval;
- }
-}
-
-void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
if (interval->IsHighInterval()) {
// The low interval already took care of allocating the spill slot.
DCHECK(!interval->GetLowInterval()->HasRegister());
@@ -1390,7 +1199,7 @@
parent->SetSpillSlot(slot);
}
-void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
LiveInterval* interval = phi->GetLiveInterval();
HInstruction* previous_phi = phi->GetPrevious();
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index f32a4db..b6e4f92 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -21,6 +21,7 @@
#include "base/arena_containers.h"
#include "base/macros.h"
#include "primitive.h"
+#include "register_allocator.h"
namespace art {
@@ -37,19 +38,15 @@
/**
* An implementation of a linear scan register allocator on an `HGraph` with SSA form.
*/
-class RegisterAllocator {
+class RegisterAllocatorLinearScan : public RegisterAllocator {
public:
- RegisterAllocator(ArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& analysis);
+ RegisterAllocatorLinearScan(ArenaAllocator* allocator,
+ CodeGenerator* codegen,
+ const SsaLivenessAnalysis& analysis);
- // Main entry point for the register allocator. Given the liveness analysis,
- // allocates registers to live intervals.
- void AllocateRegisters();
+ void AllocateRegisters() OVERRIDE;
- // Validate that the register allocator did not allocate the same register to
- // intervals that intersect each other. Returns false if it did not.
- bool Validate(bool log_fatal_on_failure) {
+ bool Validate(bool log_fatal_on_failure) OVERRIDE {
processing_core_registers_ = true;
if (!ValidateInternal(log_fatal_on_failure)) {
return false;
@@ -58,17 +55,6 @@
return ValidateInternal(log_fatal_on_failure);
}
- // Helper method for validation. Used by unit testing.
- static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
- size_t number_of_spill_slots,
- size_t number_of_out_slots,
- const CodeGenerator& codegen,
- ArenaAllocator* allocator,
- bool processing_core_registers,
- bool log_fatal_on_failure);
-
- static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
-
size_t GetNumberOfSpillSlots() const {
return int_spill_slots_.size()
+ long_spill_slots_.size()
@@ -77,8 +63,6 @@
+ catch_phi_spill_slots_;
}
- static constexpr const char* kRegisterAllocatorPassName = "register";
-
private:
// Main methods of the allocator.
void LinearScan();
@@ -88,13 +72,6 @@
// Add `interval` in the given sorted list.
static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
- // Split `interval` at the position `position`. The new interval starts at `position`.
- LiveInterval* Split(LiveInterval* interval, size_t position);
-
- // Split `interval` at a position between `from` and `to`. The method will try
- // to find an optimal split position.
- LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
-
// Returns whether `reg` is blocked by the code generator.
bool IsBlocked(int reg) const;
@@ -127,10 +104,6 @@
size_t first_register_use,
size_t* next_use);
- ArenaAllocator* const allocator_;
- CodeGenerator* const codegen_;
- const SsaLivenessAnalysis& liveness_;
-
// List of intervals for core registers that must be processed, ordered by start
// position. Last entry is the interval that has the lowest start position.
// This list is initially populated before doing the linear scan.
@@ -206,7 +179,7 @@
ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
- DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
+ DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorLinearScan);
};
} // namespace art
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 602a14c..cbb7b2f 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -24,6 +24,7 @@
#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
+#include "register_allocator.h"
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
@@ -44,9 +45,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- return register_allocator.Validate(false);
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ return register_allocator->Validate(false);
}
/**
@@ -295,9 +296,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
HBasicBlock* loop_header = graph->GetBlocks()[2];
HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
@@ -384,9 +385,9 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
- ASSERT_TRUE(register_allocator.Validate(false));
+ RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
+ ASSERT_TRUE(register_allocator->Validate(false));
}
/**
@@ -408,7 +409,7 @@
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
// Add an artifical range to cover the temps that will be put in the unhandled list.
LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval();
@@ -541,8 +542,9 @@
liveness.Analyze();
// Check that the register allocator is deterministic.
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
@@ -560,8 +562,9 @@
// Set the phi to a specific register, and check that the inputs get allocated
// the same register.
phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -579,8 +582,9 @@
// Set input1 to a specific register, and check that the phi and other input get allocated
// the same register.
input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -598,8 +602,9 @@
// Set input2 to a specific register, and check that the phi and other input get allocated
// the same register.
input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
@@ -658,8 +663,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
@@ -677,8 +683,9 @@
// Don't use SetInAt because we are overriding an already allocated location.
ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
}
@@ -726,8 +733,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// Sanity check that in normal conditions, the registers are the same.
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 1);
@@ -748,8 +756,9 @@
ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
ASSERT_EQ(second_sub->GetLiveInterval()->GetRegister(), 2);
@@ -795,8 +804,9 @@
SsaLivenessAnalysis liveness(graph, &codegen);
liveness.Analyze();
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
- register_allocator.AllocateRegisters();
+ RegisterAllocator* register_allocator =
+ RegisterAllocator::Create(&allocator, &codegen, liveness);
+ register_allocator->AllocateRegisters();
// div on x86 requires its first input in eax and the output be the same as the first input.
ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0);
@@ -892,7 +902,7 @@
liveness.instructions_from_lifetime_position_.push_back(user);
}
- RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness);
register_allocator.unhandled_core_intervals_.push_back(fourth);
register_allocator.unhandled_core_intervals_.push_back(third);
register_allocator.unhandled_core_intervals_.push_back(second);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index e48a164..966587d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -33,7 +33,9 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -119,7 +121,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f9c34f5..34d3158 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -672,6 +672,12 @@
.endif
.endm
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+ str \rReg, [sp, #\offset] @ save rReg
+ .cfi_rel_offset \rReg, \offset
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -1752,30 +1758,83 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
* getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * `reg`, saving and restoring all caller-save registers.
+ *
+ * If `reg` is different from `r0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `reg` is used to pass the (sole) argument of this
+ * function (instead of R0);
+ * - register `reg` is used to return the result of this function
* (instead of R0);
- * - `reg` is used to return the result of this function (instead of R0);
* - R0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
- push {lr} @ save return address
- .cfi_adjust_cfa_offset 4
- .cfi_rel_offset lr, 0
- sub sp, #4 @ push padding (native calling convention 8-byte alignment)
- .cfi_adjust_cfa_offset 4
- mov r0, \reg @ pass arg1 - obj from `reg`
- bl artReadBarrierMark @ artReadBarrierMark(obj)
- mov \reg, r0 @ return result into `reg`
- add sp, #4 @ pop padding
- .cfi_adjust_cfa_offset -4
- pop {pc} @ return
+ push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers
+ .cfi_adjust_cfa_offset 32
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ .cfi_rel_offset r9, 20
+ .cfi_rel_offset r12, 24
+ .cfi_rel_offset lr, 28
+ vpush {s0-s15} @ save floating-point caller-save registers
+ .cfi_adjust_cfa_offset 64
+
+ .ifnc \reg, r0
+ mov r0, \reg @ pass arg1 - obj from `reg`
+ .endif
+ bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj)
+
+ vpop {s0-s15} @ restore floating-point registers
+ .cfi_adjust_cfa_offset -64
+ @ If `reg` is a caller-save register, save the result to its
+ @ corresponding stack slot; it will be restored by the "pop"
+ @ instruction below. Otherwise, move result into `reg`.
+ @
+ @ (Note that saving `reg` to its stack slot will overwrite the value
+ @ previously stored by the "push" instruction above. That is
+ @ alright, as in that case we know that `reg` is not a live
+ @ register, as it is used to pass the argument and return the result
+ @ of this function.)
+ .ifc \reg, r0
+ PUSH_REG r0, 0 @ copy result to r0's stack location
+ .else
+ .ifc \reg, r1
+ PUSH_REG r0, 4 @ copy result to r1's stack location
+ .else
+ .ifc \reg, r2
+ PUSH_REG r0, 8 @ copy result to r2's stack location
+ .else
+ .ifc \reg, r3
+ PUSH_REG r0, 12 @ copy result to r3's stack location
+ .else
+ .ifc \reg, r4
+ PUSH_REG r0, 16 @ copy result to r4's stack location
+ .else
+ .ifc \reg, r9
+ PUSH_REG r0, 20 @ copy result to r9's stack location
+ .else
+ .ifc \reg, r12
+ PUSH_REG r0, 24 @ copy result to r12's stack location
+ .else
+ mov \reg, r0 @ return result into `reg`
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return
END \name
.endm
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 5385a2f..2e5f5ad 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -33,7 +33,9 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -122,7 +124,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c893e77..6173ae7 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1253,6 +1253,22 @@
.endif
.endm
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+ .ifc \xReg1, \xExclude
+ ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2
+ .else
+ .ifc \xReg2, \xExclude
+ ldr \xReg1, [sp, #\offset] // restore xReg1
+ .else
+ ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2
+ .endif
+ .endif
+ .cfi_restore \xReg1
+ .cfi_restore \xReg2
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -2222,56 +2238,148 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
- * getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * getting its argument and returning its result through W register
+ * `wreg` (corresponding to X register `xreg`), saving and restoring
+ * all caller-save registers.
+ *
+ * If `wreg` is different from `w0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `wreg` is used to pass the (sole) argument of this
+ * function (instead of W0);
+ * - register `wreg` is used to return the result of this function
* (instead of W0);
- * - `reg` is used to return the result of this function (instead of W0);
* - W0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
-.macro READ_BARRIER_MARK_REG name, reg
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
- str xLR, [sp, #-16]! // Save return address and add padding (16B align stack).
- .cfi_adjust_cfa_offset 16
- .cfi_rel_offset x30, 0
- mov w0, \reg // Pass arg1 - obj from `reg`
+ /*
+ * Allocate 46 stack slots * 8 = 368 bytes:
+ * - 20 slots for core registers X0-X19
+ * - 24 slots for floating-point registers D0-D7 and D16-D31
+ * - 1 slot for return address register XLR
+ * - 1 padding slot for 16-byte stack alignment
+ */
+ // Save all potentially live caller-save core registers.
+ stp x0, x1, [sp, #-368]!
+ .cfi_adjust_cfa_offset 368
+ .cfi_rel_offset x0, 0
+ .cfi_rel_offset x1, 8
+ stp x2, x3, [sp, #16]
+ .cfi_rel_offset x2, 16
+ .cfi_rel_offset x3, 24
+ stp x4, x5, [sp, #32]
+ .cfi_rel_offset x4, 32
+ .cfi_rel_offset x5, 40
+ stp x6, x7, [sp, #48]
+ .cfi_rel_offset x6, 48
+ .cfi_rel_offset x7, 56
+ stp x8, x9, [sp, #64]
+ .cfi_rel_offset x8, 64
+ .cfi_rel_offset x9, 72
+ stp x10, x11, [sp, #80]
+ .cfi_rel_offset x10, 80
+ .cfi_rel_offset x11, 88
+ stp x12, x13, [sp, #96]
+ .cfi_rel_offset x12, 96
+ .cfi_rel_offset x13, 104
+ stp x14, x15, [sp, #112]
+ .cfi_rel_offset x14, 112
+ .cfi_rel_offset x15, 120
+ stp x16, x17, [sp, #128]
+ .cfi_rel_offset x16, 128
+ .cfi_rel_offset x17, 136
+ stp x18, x19, [sp, #144]
+ .cfi_rel_offset x18, 144
+ .cfi_rel_offset x19, 152
+ // Save all potentially live caller-save floating-point registers.
+ stp d0, d1, [sp, #160]
+ stp d2, d3, [sp, #176]
+ stp d4, d5, [sp, #192]
+ stp d6, d7, [sp, #208]
+ stp d16, d17, [sp, #224]
+ stp d18, d19, [sp, #240]
+ stp d20, d21, [sp, #256]
+ stp d22, d23, [sp, #272]
+ stp d24, d25, [sp, #288]
+ stp d26, d27, [sp, #304]
+ stp d28, d29, [sp, #320]
+ stp d30, d31, [sp, #336]
+ // Save return address.
+ str xLR, [sp, #352]
+ .cfi_rel_offset x30, 352
+ // (sp + #360 is a padding slot)
+
+ .ifnc \wreg, w0
+ mov w0, \wreg // Pass arg1 - obj from `wreg`
+ .endif
bl artReadBarrierMark // artReadBarrierMark(obj)
- mov \reg, w0 // Return result into `reg`
- ldr xLR, [sp], #16 // Restore return address and remove padding.
+ .ifnc \wreg, w0
+ mov \wreg, w0 // Return result into `wreg`
+ .endif
+
+ // Restore core regs, except `xreg`, as `wreg` is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REGS_NE x0, x1, 0, \xreg
+ POP_REGS_NE x2, x3, 16, \xreg
+ POP_REGS_NE x4, x5, 32, \xreg
+ POP_REGS_NE x6, x7, 48, \xreg
+ POP_REGS_NE x8, x9, 64, \xreg
+ POP_REGS_NE x10, x11, 80, \xreg
+ POP_REGS_NE x12, x13, 96, \xreg
+ POP_REGS_NE x14, x15, 112, \xreg
+ POP_REGS_NE x16, x17, 128, \xreg
+ POP_REGS_NE x18, x19, 144, \xreg
+ // Restore floating-point registers.
+ ldp d0, d1, [sp, #160]
+ ldp d2, d3, [sp, #176]
+ ldp d4, d5, [sp, #192]
+ ldp d6, d7, [sp, #208]
+ ldp d16, d17, [sp, #224]
+ ldp d18, d19, [sp, #240]
+ ldp d20, d21, [sp, #256]
+ ldp d22, d23, [sp, #272]
+ ldp d24, d25, [sp, #288]
+ ldp d26, d27, [sp, #304]
+ ldp d28, d29, [sp, #320]
+ ldp d30, d31, [sp, #336]
+ // Restore return address and remove padding.
+ ldr xLR, [sp, #352]
.cfi_restore x30
- .cfi_adjust_cfa_offset -16
+ add sp, sp, #368
+ .cfi_adjust_cfa_offset -368
ret
END \name
.endm
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index b19aa01..b02edb6 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -28,8 +28,8 @@
namespace art {
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
- const mirror::Class* ref_class);
+extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
+ const mirror::Class* ref_class);
// Math entrypoints.
extern int32_t CmpgDouble(double a, double b);
extern int32_t CmplDouble(double a, double b);
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 8f13d58..4e9756c 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -31,7 +31,8 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index e75fecb..77e04e7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1908,41 +1908,73 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrierMark entry point, getting input and returning
-// result through EAX (register 0), following the standard runtime
-// calling convention.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- PUSH eax // pass arg1 - obj
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
// (instead of EAX);
-// - `reg` is used to return the result of this function (instead of EAX);
+// - register `reg` is used to return the result of this function
+// (instead of EAX);
// - EAX is treated like a normal (non-argument) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+ // Save all potentially live caller-save core registers.
+ PUSH eax
+ PUSH ecx
+ PUSH edx
+ PUSH ebx
+ // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+ // and create space for caller-save floating-point registers.
+ subl MACRO_LITERAL(4 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movsd %xmm0, 0(%esp)
+ movsd %xmm1, 8(%esp)
+ movsd %xmm2, 16(%esp)
+ movsd %xmm3, 24(%esp)
+ movsd %xmm4, 32(%esp)
+ movsd %xmm5, 40(%esp)
+ movsd %xmm6, 48(%esp)
+ movsd %xmm7, 56(%esp)
+
+ subl LITERAL(4), %esp // alignment padding
+ CFI_ADJUST_CFA_OFFSET(4)
PUSH RAW_VAR(reg) // pass arg1 - obj from `reg`
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movl %eax, REG_VAR(reg) // return result into `reg`
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
+ .ifnc RAW_VAR(reg), eax
+ movl %eax, REG_VAR(reg) // return result into `reg`
+ .endif
+ addl LITERAL(8), %esp // pop argument and remove padding
+ CFI_ADJUST_CFA_OFFSET(-8)
+
+ // Restore floating-point registers.
+ movsd 0(%esp), %xmm0
+ movsd 8(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ movsd 24(%esp), %xmm3
+ movsd 32(%esp), %xmm4
+ movsd 40(%esp), %xmm5
+ movsd 48(%esp), %xmm6
+ movsd 56(%esp), %xmm7
+ // Remove floating-point registers and padding.
+ addl MACRO_LITERAL(8 * 8 + 4), %esp
+ CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE ebx, RAW_VAR(reg)
+ POP_REG_NE edx, RAW_VAR(reg)
+ POP_REG_NE ecx, RAW_VAR(reg)
+ POP_REG_NE eax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index cf0039c..c4e723c 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -52,7 +52,7 @@
#define LITERAL(value) $value
#if defined(__APPLE__)
- #define MACRO_LITERAL(value) $$(value)
+ #define MACRO_LITERAL(value) $(value)
#else
#define MACRO_LITERAL(value) $value
#endif
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index b566fb1..c2e3023 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -34,7 +34,8 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 496e6a8..784ec39 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1815,73 +1815,93 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RAX (register 0), thus following a non-standard
-// runtime calling convention:
-// - RAX is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RAX is still used to return the result
-// of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq %rax, %rdi // Pass arg1 - obj from RAX.
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RDI (register 7), thus following a non-standard
-// runtime calling convention:
-// - RDI is used to return the result of this function (instead of RAX);
-// - RAX is treated like a normal (non-result) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RDI is still used to pass the (sole)
-// argument of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg07
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, %rdi // Return result into RDI.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg07
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - `reg` is used to return the result of this function (instead of RAX);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - RAX is treated like a normal (non-result) caller-save register;
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+// the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+// the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+// (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+// (non-result) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Save all potentially live caller-save core registers.
+ PUSH rax
+ PUSH rcx
+ PUSH rdx
+ PUSH rsi
+ PUSH rdi
+ PUSH r8
+ PUSH r9
+ PUSH r10
+ PUSH r11
+ // Create space for caller-save floating-point registers.
+ subq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(12 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movq %xmm0, 0(%rsp)
+ movq %xmm1, 8(%rsp)
+ movq %xmm2, 16(%rsp)
+ movq %xmm3, 24(%rsp)
+ movq %xmm4, 32(%rsp)
+ movq %xmm5, 40(%rsp)
+ movq %xmm6, 48(%rsp)
+ movq %xmm7, 56(%rsp)
+ movq %xmm8, 64(%rsp)
+ movq %xmm9, 72(%rsp)
+ movq %xmm10, 80(%rsp)
+ movq %xmm11, 88(%rsp)
SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+
+ .ifnc RAW_VAR(reg), rdi
+ movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+ .endif
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, REG_VAR(reg) // Return result into `reg`.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
+ .ifnc RAW_VAR(reg), rax
+ movq %rax, REG_VAR(reg) // Return result into `reg`.
+ .endif
+
RESTORE_FP_CALLEE_SAVE_FRAME
+ // Restore floating-point registers.
+ movq 0(%rsp), %xmm0
+ movq 8(%rsp), %xmm1
+ movq 16(%rsp), %xmm2
+ movq 24(%rsp), %xmm3
+ movq 32(%rsp), %xmm4
+ movq 40(%rsp), %xmm5
+ movq 48(%rsp), %xmm6
+ movq 56(%rsp), %xmm7
+ movq 64(%rsp), %xmm8
+ movq 72(%rsp), %xmm9
+ movq 80(%rsp), %xmm10
+ movq 88(%rsp), %xmm11
+ // Remove floating-point registers.
+ addq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE r11, RAW_VAR(reg)
+ POP_REG_NE r10, RAW_VAR(reg)
+ POP_REG_NE r9, RAW_VAR(reg)
+ POP_REG_NE r8, RAW_VAR(reg)
+ POP_REG_NE rdi, RAW_VAR(reg)
+ POP_REG_NE rsi, RAW_VAR(reg)
+ POP_REG_NE rdx, RAW_VAR(reg)
+ POP_REG_NE rcx, RAW_VAR(reg)
+ POP_REG_NE rax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
-// Note: art_quick_read_barrier_mark_reg00 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
@@ -1889,7 +1909,7 @@
// cannot be used to pass arguments.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
-// Note: art_quick_read_barrier_mark_reg07 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 91258c7..32425d8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -120,6 +120,10 @@
return dex_method_index_;
}
+inline uint32_t ArtMethod::GetImtIndex() {
+ return GetDexMethodIndex() % ImTable::kSize;
+}
+
inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
pointer_size);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index d75113e..1d14203 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -421,6 +421,8 @@
ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+ ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
+
void SetDexMethodIndex(uint32_t new_idx) {
// Not called within a transaction.
dex_method_index_ = new_idx;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cb97faa..d0dad64 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -6159,11 +6159,6 @@
}
}
-static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
- SHARED_REQUIRES(Locks::mutator_lock_) {
- return interface_method->GetDexMethodIndex() % ImTable::kSize;
-}
-
ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
LinearAlloc* linear_alloc,
size_t image_pointer_size) {
@@ -6215,7 +6210,7 @@
// or interface methods in the IMT here they will not create extra conflicts since we compare
// names and signatures in SetIMTRef.
ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
- const uint32_t imt_index = GetIMTIndex(interface_method);
+ const uint32_t imt_index = interface_method->GetImtIndex();
// There is only any conflicts if all of the interface methods for an IMT slot don't have
// the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6269,7 +6264,7 @@
}
DCHECK(implementation_method != nullptr);
ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
- const uint32_t imt_index = GetIMTIndex(interface_method);
+ const uint32_t imt_index = interface_method->GetImtIndex();
if (!imt[imt_index]->IsRuntimeMethod() ||
imt[imt_index] == unimplemented_method ||
imt[imt_index] == imt_conflict_method) {
@@ -6675,7 +6670,7 @@
auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
MethodNameAndSignatureComparator interface_name_comparator(
interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
- uint32_t imt_index = GetIMTIndex(interface_method);
+ uint32_t imt_index = interface_method->GetImtIndex();
ArtMethod** imt_ptr = &out_imt[imt_index];
// For each method listed in the interface's method list, find the
// matching method in our class's method list. We want to favor the
@@ -7700,7 +7695,7 @@
}
if (is_static) {
- resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+ resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
} else {
resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
}
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ab14655..7ecd595 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,7 @@
#include "entrypoint_utils.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "class_linker-inl.h"
#include "common_throws.h"
#include "dex_file.h"
@@ -600,7 +600,7 @@
}
}
case kInterface: {
- uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
+ uint32_t imt_index = resolved_method->GetImtIndex();
size_t pointer_size = class_linker->GetImagePointerSize();
ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
Get(imt_index, pointer_size);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1152b94..49043f6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2170,8 +2170,7 @@
if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
// If the dex cache already resolved the interface method, look whether we have
// a match in the ImtConflictTable.
- uint32_t imt_index = interface_method->GetDexMethodIndex();
- ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+ ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*));
if (LIKELY(conflict_method->IsRuntimeMethod())) {
ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
DCHECK(current_table != nullptr);
@@ -2222,8 +2221,8 @@
// We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
// We create a new table with the new pair { interface_method, method }.
- uint32_t imt_index = interface_method->GetDexMethodIndex();
- ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
+ uint32_t imt_index = interface_method->GetImtIndex();
+ ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*));
if (conflict_method->IsRuntimeMethod()) {
ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
cls.Get(),
@@ -2234,7 +2233,7 @@
if (new_conflict_method != conflict_method) {
// Update the IMT if we create a new conflict method. No fence needed here, as the
// data is consistent.
- imt->Set(imt_index % ImTable::kSize,
+ imt->Set(imt_index,
new_conflict_method,
sizeof(void*));
}
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 3011112..4019a5b 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -153,6 +153,14 @@
}
}
+inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
+ // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
+ if (UNLIKELY(mark_from_read_barrier_measurements_)) {
+ return MarkFromReadBarrierWithMeasurements(from_ref);
+ }
+ return Mark(from_ref);
+}
+
inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) {
DCHECK(region_space_->IsInFromSpace(from_ref));
LockWord lw = from_ref->GetLockWord(false);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b7b5aa0..d2d2f23 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,7 +17,9 @@
#include "concurrent_copying.h"
#include "art_field-inl.h"
+#include "base/histogram-inl.h"
#include "base/stl_util.h"
+#include "base/systrace.h"
#include "debugger.h"
#include "gc/accounting/heap_bitmap-inl.h"
#include "gc/accounting/space_bitmap-inl.h"
@@ -39,7 +41,9 @@
static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix)
+ConcurrentCopying::ConcurrentCopying(Heap* heap,
+ const std::string& name_prefix,
+ bool measure_read_barrier_slow_path)
: GarbageCollector(heap,
name_prefix + (name_prefix.empty() ? "" : " ") +
"concurrent copying + mark sweep"),
@@ -54,6 +58,14 @@
heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
weak_ref_access_enabled_(true),
skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
+ measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+ rb_slow_path_ns_(0),
+ rb_slow_path_count_(0),
+ rb_slow_path_count_gc_(0),
+ rb_slow_path_histogram_lock_("Read barrier histogram lock"),
+ rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32),
+ rb_slow_path_count_total_(0),
+ rb_slow_path_count_gc_total_(0),
rb_table_(heap_->GetReadBarrierTable()),
force_evacuate_all_(false),
immune_gray_stack_lock_("concurrent copying immune gray stack lock",
@@ -162,6 +174,14 @@
MutexLock mu(Thread::Current(), mark_stack_lock_);
CHECK(false_gray_stack_.empty());
}
+
+ mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
+ if (measure_read_barrier_slow_path_) {
+ rb_slow_path_ns_.StoreRelaxed(0);
+ rb_slow_path_count_.StoreRelaxed(0);
+ rb_slow_path_count_gc_.StoreRelaxed(0);
+ }
+
immune_spaces_.Reset();
bytes_moved_.StoreRelaxed(0);
objects_moved_.StoreRelaxed(0);
@@ -194,7 +214,7 @@
}
// Used to switch the thread roots of a thread from from-space refs to to-space refs.
-class ConcurrentCopying::ThreadFlipVisitor : public Closure {
+class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor {
public:
ThreadFlipVisitor(ConcurrentCopying* concurrent_copying, bool use_tlab)
: concurrent_copying_(concurrent_copying), use_tlab_(use_tlab) {
@@ -221,10 +241,44 @@
thread->RevokeThreadLocalAllocationStack();
}
ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
- thread->VisitRoots(concurrent_copying_);
+ // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
+ // only.
+ thread->VisitRoots(this);
concurrent_copying_->GetBarrier().Pass(self);
}
+ void VisitRoots(mirror::Object*** roots,
+ size_t count,
+ const RootInfo& info ATTRIBUTE_UNUSED)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ for (size_t i = 0; i < count; ++i) {
+ mirror::Object** root = roots[i];
+ mirror::Object* ref = *root;
+ if (ref != nullptr) {
+ mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+ if (to_ref != ref) {
+ *root = to_ref;
+ }
+ }
+ }
+ }
+
+ void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+ size_t count,
+ const RootInfo& info ATTRIBUTE_UNUSED)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ for (size_t i = 0; i < count; ++i) {
+ mirror::CompressedReference<mirror::Object>* const root = roots[i];
+ if (!root->IsNull()) {
+ mirror::Object* ref = root->AsMirrorPtr();
+ mirror::Object* to_ref = concurrent_copying_->Mark(ref);
+ if (to_ref != ref) {
+ root->Assign(to_ref);
+ }
+ }
+ }
+ }
+
private:
ConcurrentCopying* const concurrent_copying_;
const bool use_tlab_;
@@ -1996,9 +2050,17 @@
MutexLock mu(Thread::Current(), skipped_blocks_lock_);
skipped_blocks_map_.clear();
}
- ReaderMutexLock mu(self, *Locks::mutator_lock_);
- WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
- heap_->ClearMarkedObjects();
+ {
+ ReaderMutexLock mu(self, *Locks::mutator_lock_);
+ WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+ heap_->ClearMarkedObjects();
+ }
+ if (measure_read_barrier_slow_path_) {
+ MutexLock mu(self, rb_slow_path_histogram_lock_);
+ rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
+ rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
+ rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+ }
}
bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) {
@@ -2036,6 +2098,37 @@
region_space_->RevokeAllThreadLocalBuffers();
}
+mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
+ if (Thread::Current() != thread_running_gc_) {
+ rb_slow_path_count_.FetchAndAddRelaxed(1u);
+ } else {
+ rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+ }
+ ScopedTrace tr(__FUNCTION__);
+ const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
+ mirror::Object* ret = Mark(from_ref);
+ if (measure_read_barrier_slow_path_) {
+ rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+ }
+ return ret;
+}
+
+void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
+ GarbageCollector::DumpPerformanceInfo(os);
+ MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_);
+ if (rb_slow_path_time_histogram_.SampleSize() > 0) {
+ Histogram<uint64_t>::CumulativeData cumulative_data;
+ rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data);
+ rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+ }
+ if (rb_slow_path_count_total_ > 0) {
+ os << "Slow path count " << rb_slow_path_count_total_ << "\n";
+ }
+ if (rb_slow_path_count_gc_total_ > 0) {
+ os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
+ }
+}
+
} // namespace collector
} // namespace gc
} // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 166a1f0..6a8d052 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -58,17 +58,24 @@
// Enable verbose mode.
static constexpr bool kVerboseMode = false;
- ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
+ ConcurrentCopying(Heap* heap,
+ const std::string& name_prefix = "",
+ bool measure_read_barrier_slow_path = false);
~ConcurrentCopying();
virtual void RunPhases() OVERRIDE
- REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ REQUIRES(!immune_gray_stack_lock_,
+ !mark_stack_lock_,
+ !rb_slow_path_histogram_lock_,
+ !skipped_blocks_lock_);
void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
- void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+ void FinishPhase() REQUIRES(!mark_stack_lock_,
+ !rb_slow_path_histogram_lock_,
+ !skipped_blocks_lock_);
void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!Locks::heap_bitmap_lock_);
@@ -95,7 +102,11 @@
return IsMarked(ref) == ref;
}
template<bool kGrayImmuneObject = true>
- ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+ ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
bool IsMarking() const {
return is_marking_;
@@ -203,6 +214,10 @@
REQUIRES(!mark_stack_lock_);
void ScanImmuneObject(mirror::Object* obj)
SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+ mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+ void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_);
space::RegionSpace* region_space_; // The underlying region space.
std::unique_ptr<Barrier> gc_barrier_;
@@ -251,6 +266,20 @@
Atomic<size_t> to_space_bytes_skipped_;
Atomic<size_t> to_space_objects_skipped_;
+ // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier
+ // and also log.
+ bool measure_read_barrier_slow_path_;
+ // mark_from_read_barrier_measurements_ is true if systrace is enabled or
+ // measure_read_barrier_time_ is true.
+ bool mark_from_read_barrier_measurements_;
+ Atomic<uint64_t> rb_slow_path_ns_;
+ Atomic<uint64_t> rb_slow_path_count_;
+ Atomic<uint64_t> rb_slow_path_count_gc_;
+ mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_);
+ uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+ uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_);
+
accounting::ReadBarrierTable* rb_table_;
bool force_evacuate_all_; // True if all regions are evacuated.
Atomic<bool> updated_all_immune_objects_;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 580486a..e0b71a7 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -181,7 +181,7 @@
void RecordFree(const ObjectBytePair& freed);
// Record a free of large objects.
void RecordFreeLOS(const ObjectBytePair& freed);
- void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
+ virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_);
// Helper functions for querying if objects are marked. These are used for processing references,
// and will be used for reading system weaks while the GC is running.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a6d62a9..6f4767e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -157,6 +157,7 @@
bool verify_pre_sweeping_rosalloc,
bool verify_post_gc_rosalloc,
bool gc_stress_mode,
+ bool measure_gc_performance,
bool use_homogeneous_space_compaction_for_oom,
uint64_t min_interval_homogeneous_space_compaction_by_oom)
: non_moving_space_(nullptr),
@@ -599,7 +600,9 @@
garbage_collectors_.push_back(semi_space_collector_);
}
if (MayUseCollector(kCollectorTypeCC)) {
- concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+ concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
+ "",
+ measure_gc_performance);
garbage_collectors_.push_back(concurrent_copying_collector_);
}
if (MayUseCollector(kCollectorTypeMC)) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6fb048a..bb0d11a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -182,6 +182,7 @@
bool verify_pre_sweeping_rosalloc,
bool verify_post_gc_rosalloc,
bool gc_stress_mode,
+ bool measure_gc_performance,
bool use_homogeneous_space_compaction,
uint64_t min_interval_homogeneous_space_compaction_by_oom);
diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S
index 0d57cbf..04236ad 100644
--- a/runtime/interpreter/mterp/arm64/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S
@@ -7,8 +7,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
$instr // s2<- op
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 2d3a11e..7628ed3 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -234,7 +234,7 @@
#if MTERP_LOGGING
mov x0, xSELF
add x1, xFP, #OFF_FP_SHADOWFRAME
- sbfm x2, xINST, 0, 31
+ sxtw x2, wINST
bl MterpLogOSR
#endif
mov x0, #1 // Signal normal return
diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S
index 9f5ad1e..aed830b 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrow.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrow.S
@@ -8,10 +8,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S
index 411396b..6fddfea 100644
--- a/runtime/interpreter/mterp/arm64/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm64/funopNarrower.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S
index d83b39c..409e26b 100644
--- a/runtime/interpreter/mterp/arm64/funopWide.S
+++ b/runtime/interpreter/mterp/arm64/funopWide.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S
index 50a73f1..4c91ebc 100644
--- a/runtime/interpreter/mterp/arm64/funopWider.S
+++ b/runtime/interpreter/mterp/arm64/funopWider.S
@@ -7,10 +7,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG $srcreg, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
$instr // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE $tgtreg, w4 // vA<- d0
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
index e43628b..553d481 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S
@@ -1,8 +1,7 @@
/* const-wide/16 vAA, #+BBBB */
- FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended
+ FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended)
lsr w3, wINST, #8 // w3<- AA
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- sbfm x0, x0, 0, 31
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
index 527f7d8..9dc4fc3 100644
--- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S
@@ -1,10 +1,9 @@
/* const-wide/32 vAA, #+BBBBbbbb */
- FETCH w0, 1 // w0<- 0000bbbb (low)
+ FETCH w0, 1 // x0<- 000000000000bbbb (low)
lsr w3, wINST, #8 // w3<- AA
- FETCH_S w2, 2 // w2<- ssssBBBB (high)
+ FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high)
FETCH_ADVANCE_INST 3 // advance rPC, load wINST
GET_INST_OPCODE ip // extract opcode from wINST
- orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb
- sbfm x0, x0, 0, 31
+ orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S
index 45c68a3..699b2c4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S
@@ -5,8 +5,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
$load w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
$extend
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 2480d2d..30b30c2 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -3,7 +3,7 @@
FETCH w4, 1 // w4<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cbz w3, common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
add x4, x3, x4 // create direct pointer
ldr x0, [x4]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S
index 647bc75..a56705a 100644
--- a/runtime/interpreter/mterp/arm64/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm64/op_instance_of.S
@@ -13,8 +13,7 @@
mov x3, xSELF // w3<- self
bl MterpInstanceOf // (index, &obj, method, self)
ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
- lsr w2, wINST, #8 // w2<- A+
- and w2, w2, #15 // w2<- A
+ ubfx w2, wINST, #8, #4 // w2<- A
PREFETCH_INST 2
cbnz x1, MterpException
ADVANCE 2 // advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 13d2120..35830f3 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1 @@
-%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"}
+%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"}
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 27b5dc5..566e2bf 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -3,8 +3,7 @@
FETCH w3, 1 // w3<- field byte offset
GET_VREG w2, w2 // w2<- fp[B], the object pointer
ubfx w0, wINST, #8, #4 // w0<- A
- cmp w2, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w2, common_errNullObject // object was null
GET_VREG_WIDE x0, w0 // x0-< fp[A]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
add x1, x2, x3 // create a direct pointer
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index 1456f1a..4faa6d2 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,12 +9,12 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2
bl $func // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
index 0b91891..95f81c5 100644
--- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S
@@ -1,12 +1,10 @@
/* rem vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
bl fmodf
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG s0, w9
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S
index bd0f237..3062a3f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
index b4671d2..9a7e09f 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
index 4dd32e0..17f57f9 100644
--- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsl w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S
index c214a18..493b740 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
index 3c1484b..6efe8ee 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
index 26d5024..274080c 100644
--- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"asr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S
index bb8382b..005452b 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S
@@ -1 +1 @@
-%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binop.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
index dbccb99..1cb8cb7 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S
@@ -1 +1 @@
-%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binop2addr.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
index 35090c4..ff30e1f 100644
--- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
+++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S
@@ -1 +1 @@
-%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"}
+%include "arm64/binopLit8.S" {"instr":"lsr w0, w0, w1"}
diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S
index 6306fca..dcb2fb7 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide.S
@@ -12,8 +12,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- $opcode x0, x1, x2 // Do the shift.
+ $opcode x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
index 77d104a..b860dfd 100644
--- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S
+++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S
@@ -8,8 +8,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- $opcode x0, x0, x1
+ $opcode x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index df0b686..d470551 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -747,10 +747,9 @@
.L_op_const_wide_16: /* 0x16 */
/* File: arm64/op_const_wide_16.S */
/* const-wide/16 vAA, #+BBBB */
- FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended
+ FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended)
lsr w3, wINST, #8 // w3<- AA
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- sbfm x0, x0, 0, 31
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
@@ -760,13 +759,12 @@
.L_op_const_wide_32: /* 0x17 */
/* File: arm64/op_const_wide_32.S */
/* const-wide/32 vAA, #+BBBBbbbb */
- FETCH w0, 1 // w0<- 0000bbbb (low)
+ FETCH w0, 1 // x0<- 000000000000bbbb (low)
lsr w3, wINST, #8 // w3<- AA
- FETCH_S w2, 2 // w2<- ssssBBBB (high)
+ FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high)
FETCH_ADVANCE_INST 3 // advance rPC, load wINST
GET_INST_OPCODE ip // extract opcode from wINST
- orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb
- sbfm x0, x0, 0, 31
+ orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb
SET_VREG_WIDE x0, w3
GOTO_OPCODE ip // jump to next instruction
@@ -934,8 +932,7 @@
mov x3, xSELF // w3<- self
bl MterpInstanceOf // (index, &obj, method, self)
ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET]
- lsr w2, wINST, #8 // w2<- A+
- and w2, w2, #15 // w2<- A
+ ubfx w2, wINST, #8, #4 // w2<- A
PREFETCH_INST 2
cbnz x1, MterpException
ADVANCE 2 // advance rPC
@@ -1143,14 +1140,14 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2
bl MterpDoPackedSwitch // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
/* ------------------------------ */
@@ -1168,14 +1165,14 @@
* for: packed-switch, sparse-switch
*/
/* op vAA, +BBBB */
- FETCH w0, 1 // w0<- bbbb (lo)
- FETCH w1, 2 // w1<- BBBB (hi)
+ FETCH w0, 1 // x0<- 000000000000bbbb (lo)
+ FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi)
lsr w3, wINST, #8 // w3<- AA
- orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb
+ orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb
GET_VREG w1, w3 // w1<- vAA
- add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2
+ add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2
bl MterpDoSparseSwitch // w0<- code-unit branch offset
- sbfm xINST, x0, 0, 31
+ sxtw xINST, w0
b MterpCommonTakenBranchNoFlags
@@ -3345,11 +3342,10 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
- sbfm x0, x0, 0, 31 // d0<- op
+ sxtw x0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
GOTO_OPCODE ip // jump to next instruction
@@ -3369,10 +3365,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf s0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -3392,10 +3387,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG w0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf d0, w0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3415,10 +3409,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
// d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3438,10 +3431,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf s0, x0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -3461,10 +3453,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE x0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
scvtf d0, x0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3485,10 +3476,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs w0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3508,10 +3498,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs x0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
@@ -3531,10 +3520,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG s0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvt d0, s0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE d0, w4 // vA<- d0
@@ -3554,10 +3542,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs w0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG w0, w4 // vA<- d0
@@ -3577,10 +3564,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvtzs x0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG_WIDE x0, w4 // vA<- d0
@@ -3600,10 +3586,9 @@
*/
/* unop vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w4, wINST, #8 // w4<- A+
+ ubfx w4, wINST, #8, #4 // w4<- A
GET_VREG_WIDE d0, w3
FETCH_ADVANCE_INST 1 // advance rPC, load wINST
- and w4, w4, #15 // w4<- A
fcvt s0, d0 // d0<- op
GET_INST_OPCODE ip // extract opcode from wINST
SET_VREG s0, w4 // vA<- d0
@@ -4032,7 +4017,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4071,7 +4056,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4110,7 +4095,7 @@
cbz w1, common_errDivideByZero // is second operand zero?
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -4424,8 +4409,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- lsl x0, x1, x2 // Do the shift.
+ lsl x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -4450,8 +4434,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- asr x0, x1, x2 // Do the shift.
+ asr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -4476,8 +4459,7 @@
and w1, w0, #255 // w1<- BB
GET_VREG_WIDE x1, w1 // x1<- vBB
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and x2, x2, #63 // Mask low 6
- lsr x0, x1, x2 // Do the shift.
+ lsr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w3 // vAA<- x0
GOTO_OPCODE ip // jump to next instruction
@@ -5089,7 +5071,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5125,7 +5107,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5161,7 +5143,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -5463,8 +5445,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- lsl x0, x0, x1
+ lsl x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5485,8 +5466,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- asr x0, x0, x1
+ asr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5507,8 +5487,7 @@
GET_VREG w1, w1 // x1<- vB
GET_VREG_WIDE x0, w2 // x0<- vA
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
- and x1, x1, #63 // Mask low 6 bits.
- lsr x0, x0, x1
+ lsr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used.
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG_WIDE x0, w2 // vAA<- result
GOTO_OPCODE ip // jump to next instruction
@@ -5529,8 +5508,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fadd s2, s0, s1 // s2<- op
@@ -5554,8 +5532,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fsub s2, s0, s1 // s2<- op
@@ -5579,8 +5556,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fmul s2, s0, s1 // s2<- op
@@ -5604,8 +5580,7 @@
*/
/* binop/2addr vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
fdiv s2, s0, s1 // s2<- op
@@ -5621,13 +5596,11 @@
/* File: arm64/op_rem_float_2addr.S */
/* rem vA, vB */
lsr w3, wINST, #12 // w3<- B
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
GET_VREG s1, w3
GET_VREG s0, w9
bl fmodf
- lsr w9, wINST, #8 // w9<- A+
- and w9, w9, #15 // w9<- A
+ ubfx w9, wINST, #8, #4 // w9<- A
FETCH_ADVANCE_INST 1 // advance rPC, load rINST
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG s0, w9
@@ -6381,7 +6354,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsl w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6417,7 +6390,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
asr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6453,7 +6426,7 @@
cbz w1, common_errDivideByZero
.endif
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
- and w1, w1, #31 // optional op; may set condition codes
+ // optional op; may set condition codes
lsr w0, w0, w1 // w0<- op, w0-w3 changed
GET_INST_OPCODE ip // extract opcode from rINST
SET_VREG w0, w9 // vAA<- w0
@@ -6471,8 +6444,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldr w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6489,7 +6461,7 @@
FETCH w4, 1 // w4<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cbz w3, common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
add x4, x3, x4 // create direct pointer
ldr x0, [x4]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
@@ -6544,8 +6516,7 @@
FETCH w3, 1 // w3<- field byte offset
GET_VREG w2, w2 // w2<- fp[B], the object pointer
ubfx w0, wINST, #8, #4 // w0<- A
- cmp w2, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w2, common_errNullObject // object was null
GET_VREG_WIDE x0, w0 // x0-< fp[A]
FETCH_ADVANCE_INST 2 // advance rPC, load wINST
add x1, x2, x3 // create a direct pointer
@@ -6710,8 +6681,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrb w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6731,8 +6701,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrsb w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6752,8 +6721,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrh w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -6773,8 +6741,7 @@
FETCH w1, 1 // w1<- field byte offset
GET_VREG w3, w2 // w3<- object we're operating on
ubfx w2, wINST, #8, #4 // w2<- A
- cmp x3, #0 // check object for null
- beq common_errNullObject // object was null
+ cbz w3, common_errNullObject // object was null
ldrsh w0, [x3, x1] // w0<- obj.field
FETCH_ADVANCE_INST 2 // advance rPC, load rINST
@@ -11521,7 +11488,7 @@
#if MTERP_LOGGING
mov x0, xSELF
add x1, xFP, #OFF_FP_SHADOWFRAME
- sbfm x2, xINST, 0, 31
+ sxtw x2, wINST
bl MterpLogOSR
#endif
mov x0, #1 // Signal normal return
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..1c31c57 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -748,21 +748,24 @@
return nullptr;
}
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ArtField* Class::FindStaticField(Thread* self,
+ Class* klass,
+ const DexCache* dex_cache,
uint32_t dex_field_idx) {
- for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+ for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
// Is the field in this class?
ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
if (f != nullptr) {
return f;
}
- // Wrap k incase it moves during GetDirectInterface.
+ // Though GetDirectInterface() should not cause thread suspension when called
+ // from here, it takes a Handle as an argument, so we need to wrap `k`.
+ ScopedAssertNoThreadSuspension ants(self, __FUNCTION__);
StackHandleScope<1> hs(self);
- HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+ Handle<mirror::Class> h_k(hs.NewHandle(k));
// Is this field in any of this class' interfaces?
for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
- StackHandleScope<1> hs2(self);
- Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+ mirror::Class* interface = GetDirectInterface(self, h_k, i);
f = FindStaticField(self, interface, dex_cache, dex_field_idx);
if (f != nullptr) {
return f;
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index f044b59..9be9f01 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1091,7 +1091,9 @@
// Finds the given static field in this class or superclass, only searches classes that
// have the same dex cache.
- static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache,
+ static ArtField* FindStaticField(Thread* self,
+ Class* klass,
+ const DexCache* dex_cache,
uint32_t dex_field_idx)
SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 79b18aa..d987f65 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -342,7 +342,7 @@
return;
}
if (is_static) {
- field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx);
+ field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx);
} else {
field = klass->FindInstanceField(dex_cache.Get(), field_idx);
}
diff --git a/runtime/oat.h b/runtime/oat.h
index e506e3c..9b8f545 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '8', '3', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 0c3eb3b..92efa21 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -220,7 +220,7 @@
}
inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) {
- return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj);
+ return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
}
inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 21cd2aa..079c079 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -989,6 +989,7 @@
xgc_option.verify_pre_sweeping_rosalloc_,
xgc_option.verify_post_gc_rosalloc_,
xgc_option.gcstress_,
+ xgc_option.measure_,
runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 0acc54d..e77a11e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -869,7 +869,7 @@
bool Trace::RegisterThread(Thread* thread) {
pid_t tid = thread->GetTid();
CHECK_LT(0U, static_cast<uint32_t>(tid));
- CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+ CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber);
if (!(*seen_threads_)[tid]) {
seen_threads_->set(tid);
diff --git a/runtime/trace.h b/runtime/trace.h
index 80f1a4c..9b29fb9 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -41,7 +41,9 @@
class Thread;
using DexIndexBitSet = std::bitset<65536>;
-using ThreadIDBitSet = std::bitset<65536>;
+
+constexpr size_t kMaxThreadIdNumber = kIsTargetBuild ? 65536U : 1048576U;
+using ThreadIDBitSet = std::bitset<kMaxThreadIdNumber>;
enum TracingMode {
kTracingInactive,
diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt
index fbd93fa..988b220 100644
--- a/test/501-regression-packed-switch/info.txt
+++ b/test/501-regression-packed-switch/info.txt
@@ -1,2 +1,4 @@
Regression test for the interpreter and optimizing's builder which used
to trip when compiled code contained a packed switch with no targets.
+Regression test for the arm64 mterp miscalculating the switch table
+address, zero-extending a register instead of sign-extending.
diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali
index 8756ed5..5a760c7 100644
--- a/test/501-regression-packed-switch/smali/Test.smali
+++ b/test/501-regression-packed-switch/smali/Test.smali
@@ -27,3 +27,28 @@
.packed-switch 0x0
.end packed-switch
.end method
+
+.method public static PackedSwitchAfterData(I)I
+ .registers 1
+ goto :pswitch_instr
+
+ :case0
+ const/4 v0, 0x1
+ return v0
+
+ :pswitch_data
+ .packed-switch 0x0
+ :case0
+ :case1
+ .end packed-switch
+
+ :pswitch_instr
+ packed-switch v0, :pswitch_data
+ const/4 v0, 0x7
+ return v0
+
+ :case1
+ const/4 v0, 0x4
+ return v0
+
+.end method
diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java
index b80bc62..12bc1a8 100644
--- a/test/501-regression-packed-switch/src/Main.java
+++ b/test/501-regression-packed-switch/src/Main.java
@@ -29,5 +29,10 @@
if (result != 5) {
throw new Error("Expected 5, got " + result);
}
+ m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class });
+ result = (Integer) m.invoke(null, new Integer(0));
+ if (result != 1) {
+ throw new Error("Expected 1, got " + result);
+ }
}
}
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index ead9446..3366f20 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -34,9 +34,21 @@
/// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after)
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
public static int constantIndexGet(int array[]) {
return array[1];
}
@@ -55,10 +67,23 @@
/// CHECK: <<Const2:i\d+>> IntConstant 2
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+ /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (before)
+ /// CHECK: <<Const2:i\d+>> IntConstant 2
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
+ /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (after)
+ /// CHECK: <<Const2:i\d+>> IntConstant 2
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>]
+
public static void constantIndexSet(int array[]) {
array[1] = 2;
}
@@ -76,7 +101,20 @@
/// CHECK: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>]
+
+
+ /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArrayGet [<<Array>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (after)
+ /// CHECK: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>]
public static int get(int array[], int index) {
@@ -102,7 +140,26 @@
/// CHECK: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>]
+
+
+ /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (before)
+ /// CHECK: ParameterValue
+ /// CHECK: ParameterValue
+ /// CHECK: <<Arg:i\d+>> ParameterValue
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Arg>>]
+
+ /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (after)
+ /// CHECK: ParameterValue
+ /// CHECK: ParameterValue
+ /// CHECK: <<Arg:i\d+>> ParameterValue
+ /// CHECK: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>]
public static void set(int array[], int index, int value) {
@@ -126,10 +183,10 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after)
@@ -137,12 +194,42 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: void Main.getSet(int[], int) GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
public static void getSet(int array[], int index) {
array[index] = array[index] + 1;
}
@@ -166,11 +253,11 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: NewArray
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
@@ -178,11 +265,45 @@
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
/// CHECK: <<Array:l\d+>> NullCheck
/// CHECK: <<Index:i\d+>> BoundsCheck
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: NewArray
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant
+ /// CHECK: <<Array:l\d+>> NullCheck
+ /// CHECK: <<Index:i\d+>> BoundsCheck
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: NewArray
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
public static int[] accrossGC(int array[], int index) {
@@ -196,6 +317,14 @@
* Test that the intermediate address is shared between array accesses after
* the bounds check have been removed by BCE.
*/
+ // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach
+ // the architecture-specific instruction simplifier, BCE has removed the bounds checks in
+ // the loop.
+
+ // Note that we do not care that the `DataOffset` is `12`. But if we do not
+ // specify it and any other `IntConstant` appears before that instruction,
+ // checker will match the previous `IntConstant`, and we will thus fail the
+ // check.
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
/// CHECK: <<Const1:i\d+>> IntConstant 1
@@ -207,14 +336,6 @@
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
/// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
- // By the time we reach the architecture-specific instruction simplifier, BCE
- // has removed the bounds checks in the loop.
-
- // Note that we do not care that the `DataOffset` is `12`. But if we do not
- // specify it and any other `IntConstant` appears before that instruction,
- // checker will match the previous `IntConstant`, and we will thus fail the
- // check.
-
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
/// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
@@ -222,10 +343,10 @@
/// CHECK: <<Index:i\d+>> Phi
/// CHECK: If
// -------------- Loop
- /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
/// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after)
@@ -235,10 +356,47 @@
/// CHECK: <<Index:i\d+>> Phi
/// CHECK: If
// -------------- Loop
- /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
+
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE1() GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>]
+ /// CHECK-NOT: IntermediateAddress
/// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>]
public static int canMergeAfterBCE1() {
@@ -279,12 +437,12 @@
/// CHECK: If
// -------------- Loop
/// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
- /// CHECK-DAG: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
- /// CHECK-DAG: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
- /// CHECK: <<Address3:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
/// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
@@ -295,7 +453,7 @@
/// CHECK: If
// -------------- Loop
/// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
- /// CHECK-DAG: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
/// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>]
/// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>]
/// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
@@ -304,8 +462,55 @@
// There should be only one intermediate address computation in the loop.
/// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after)
- /// CHECK: Arm64IntermediateAddress
- /// CHECK-NOT: Arm64IntermediateAddress
+ /// CHECK: IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
+
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (before)
+ /// CHECK: <<Const1:i\d+>> IntConstant 1
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>]
+ /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+ /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK: <<Array:l\d+>> NewArray
+ /// CHECK: <<Index:i\d+>> Phi
+ /// CHECK: If
+ // -------------- Loop
+ /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>]
+ /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>]
+ /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>]
+ /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>]
+ /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+ /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+
+ /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after)
+ /// CHECK: IntermediateAddress
+ /// CHECK-NOT: IntermediateAddress
public static int canMergeAfterBCE2() {
int[] array = {0, 1, 2, 3};
@@ -315,6 +520,37 @@
return array[array.length - 1];
}
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before)
+ /// CHECK-DAG: <<Array1:l\d+>> NewArray
+ /// CHECK-DAG: <<Array2:l\d+>> NewArray
+ /// CHECK-DAG: <<Array3:l\d+>> NewArray
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+ /// CHECK-DAG: <<Array1:l\d+>> NewArray
+ /// CHECK-DAG: <<Array2:l\d+>> NewArray
+ /// CHECK-DAG: <<Array3:l\d+>> NewArray
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>]
+ /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>]
+
+ /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after)
+ /// CHECK-NOT: IntermediateAddress
+ public static int checkLongFloatDouble() {
+ long[] array_long = {0, 1, 2, 3};
+ float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0};
+ double[] array_double = {0.0, 1.0, 2.0, 3.0};
+ double s = 0.0;
+
+ for (int i = 0; i < 4; i++) {
+ s += (double)array_long[i] + (double)array_float[i] + array_double[i];
+ }
+ return (int)s;
+ }
public static void main(String[] args) {
int[] array = {123, 456, 789};
@@ -337,5 +573,7 @@
assertIntEquals(4, canMergeAfterBCE1());
assertIntEquals(6, canMergeAfterBCE2());
+
+ assertIntEquals(18, checkLongFloatDouble());
}
}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index dd6b6f3..8f8b667 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -527,7 +527,7 @@
# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
# more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress
+# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
# instruction yet (b/26601270).
# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
# not yet handled in the read barrier configuration.
diff --git a/test/run-test b/test/run-test
index bbcd4b0..1ef5428 100755
--- a/test/run-test
+++ b/test/run-test
@@ -37,7 +37,7 @@
if [ -z "$TMPDIR" ]; then
tmp_dir="/tmp/$USER/${test_dir}"
else
- tmp_dir="${TMPDIR}/$USER/${test_dir}"
+ tmp_dir="${TMPDIR}/${test_dir}"
fi
checker="${progdir}/../tools/checker/checker.py"
export JAVA="java"