X86_64: Inline checking for kInterfaceCheck check cast
Interface check-cast with read barriers:
Before: 5.27 ns/check
After: 2.36 ns/check
Code expansion is 0.29%.
Test: test-art-host
Bug: 32577579
Bug: 29516974
Change-Id: Ie48251f97fbf2c95bd0a7c4bd62075414f8e3ae3
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 49f33d2..163858e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5741,6 +5741,10 @@
}
static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ // We need a temporary for holding the iftable length.
+ return true;
+ }
return kEmitCompilerReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5809,7 +5813,11 @@
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -5979,19 +5987,26 @@
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
+ case TypeCheckKind::kInterfaceCheck:
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ // Require a register for the interface check since there is a loop that compares the class to
+ // a memory address.
+ locations->SetInAt(1, Location::RequiresRegister());
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
+
// Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
@@ -6012,10 +6027,13 @@
Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
locations->GetTemp(1) :
Location::NoLocation();
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
- uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
- uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const int object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
@@ -6039,8 +6057,11 @@
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
} else {
@@ -6063,8 +6084,11 @@
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
NearLabel loop, compare_classes;
@@ -6083,7 +6107,11 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6107,8 +6135,11 @@
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
// Walk over the class hierarchy to find a match.
NearLabel loop;
__ Bind(&loop);
@@ -6133,7 +6164,11 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6152,8 +6187,11 @@
}
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
// Do an exact check.
NearLabel check_non_primitive_component_type;
if (cls.IsRegister()) {
@@ -6180,7 +6218,11 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6188,7 +6230,11 @@
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6197,17 +6243,15 @@
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
NearLabel done;
+
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
__ testl(obj, obj);
__ j(kEqual, &done);
}
- // /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
- // We always go into the type check slow path for the unresolved
- // and interface check cases.
+ // We always go into the type check slow path for the unresolved case.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6223,6 +6267,53 @@
// case of the `switch` code as it was previously (with a direct
// call to the runtime not using a type checking slow path).
// This should also be beneficial for the other cases above.
+
+ // Fast path for the interface check. Since we compare with a memory location in the inner
+ // loop we would need to have cls poisoned. However unpoisoning cls would reset the
+ // conditional flags and cause the conditional jump to be incorrect.
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+ // Try to avoid read barriers to improve the fast path. We can not get false positives by
+ // doing this.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ /*emit_read_barrier*/ false);
+
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ /*emit_read_barrier*/ false);
+ NearLabel is_null;
+ // Null iftable means it is empty.
+ __ testl(temp_loc.AsRegister<CpuRegister>(), temp_loc.AsRegister<CpuRegister>());
+ __ j(kZero, &is_null);
+
+ // Loop through the iftable and check if any class matches.
+ __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(),
+ Address(temp_loc.AsRegister<CpuRegister>(), array_length_offset));
+
+ NearLabel start_loop;
+ __ Bind(&start_loop);
+ __ cmpl(cls.AsRegister<CpuRegister>(),
+ Address(temp_loc.AsRegister<CpuRegister>(), object_array_data_offset));
+ __ j(kEqual, &done); // Return if same class.
+ // Go to next interface.
+ __ addq(temp_loc.AsRegister<CpuRegister>(), Immediate(2 * kHeapReferenceSize));
+ __ subq(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
+ __ j(kNotZero, &start_loop);
+ __ Bind(&is_null);
+ }
+
+ // Since we clobbered temp_loc holding the class, we need to reload it.
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&done);
break;
@@ -6397,10 +6488,11 @@
void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset) {
+ uint32_t offset,
+ bool emit_read_barrier) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
- if (kEmitCompilerReadBarrier) {
+ if (emit_read_barrier) {
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)