Merge "ARM: Add new String.compareTo intrinsic."
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index de04175..97b8839 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -987,31 +987,126 @@
void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCall,
+ invoke->InputAt(1)->CanBeNull()
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall,
kIntrinsified);
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetOut(Location::RegisterLocation(R0));
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
+ Register str = locations->InAt(0).AsRegister<Register>();
+ Register arg = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ Register temp0 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp1 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+
+ Label loop;
+ Label find_char_diff;
+ Label end;
+
+ // Get offsets of count and value fields within a string object.
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
// Note that the null check must have been done earlier.
DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
- Register argument = locations->InAt(1).AsRegister<Register>();
- __ cmp(argument, ShifterOperand(0));
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
- __ b(slow_path->GetEntryLabel(), EQ);
+ // Take slow path and throw if input can be and is null.
+ SlowPathCode* slow_path = nullptr;
+ const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+ if (can_slow_path) {
+ slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
+ }
- __ LoadFromOffset(
- kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pStringCompareTo).Int32Value());
- __ blx(LR);
- __ Bind(slow_path->GetExitLabel());
+ // Reference equality check, return 0 if same reference.
+ __ subs(out, str, ShifterOperand(arg));
+ __ b(&end, EQ);
+ // Load lengths of this and argument strings.
+ __ ldr(temp2, Address(str, count_offset));
+ __ ldr(temp1, Address(arg, count_offset));
+ // out = length diff.
+ __ subs(out, temp2, ShifterOperand(temp1));
+ // temp0 = min(len(str), len(arg)).
+ __ it(Condition::LT, kItElse);
+ __ mov(temp0, ShifterOperand(temp2), Condition::LT);
+ __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+ // Shorter string is empty?
+ __ CompareAndBranchIfZero(temp0, &end);
+
+ // Store offset of string value in preparation for comparison loop.
+ __ mov(temp1, ShifterOperand(value_offset));
+
+ // Assertions that must hold in order to compare multiple characters at a time.
+ CHECK_ALIGNED(value_offset, 8);
+ static_assert(IsAligned<8>(kObjectAlignment),
+ "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+ const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+ DCHECK_EQ(char_size, 2u);
+
+ // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+ __ Bind(&loop);
+ __ ldr(IP, Address(str, temp1));
+ __ ldr(temp2, Address(arg, temp1));
+ __ cmp(IP, ShifterOperand(temp2));
+ __ b(&find_char_diff, NE);
+ __ add(temp1, temp1, ShifterOperand(char_size * 2));
+ __ sub(temp0, temp0, ShifterOperand(2));
+
+ __ ldr(IP, Address(str, temp1));
+ __ ldr(temp2, Address(arg, temp1));
+ __ cmp(IP, ShifterOperand(temp2));
+ __ b(&find_char_diff, NE);
+ __ add(temp1, temp1, ShifterOperand(char_size * 2));
+ __ subs(temp0, temp0, ShifterOperand(2));
+
+ __ b(&loop, GT);
+ __ b(&end);
+
+ // Find the single 16-bit character difference.
+ __ Bind(&find_char_diff);
+ // Get the bit position of the first character that differs.
+ __ eor(temp1, temp2, ShifterOperand(IP));
+ __ rbit(temp1, temp1);
+ __ clz(temp1, temp1);
+
+ // temp0 = number of 16-bit characters remaining to compare.
+ // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
+ // after the end of the shorter string data).
+
+ // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
+ // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+
+ // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
+ // return length diff (out).
+ __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
+ __ b(&end, LE);
+ // Extract the characters and calculate the difference.
+ __ bic(temp1, temp1, ShifterOperand(0xf));
+ __ Lsr(temp2, temp2, temp1);
+ __ Lsr(IP, IP, temp1);
+ __ movt(temp2, 0);
+ __ movt(IP, 0);
+ __ sub(out, IP, ShifterOperand(temp2));
+
+ __ Bind(&end);
+
+ if (can_slow_path) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderARM::VisitStringEquals(HInvoke* invoke) {
@@ -1082,7 +1177,7 @@
// Assertions that must hold in order to compare strings 2 characters at a time.
DCHECK_ALIGNED(value_offset, 4);
- static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+ static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
__ LoadImmediate(temp1, value_offset);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f0e9ac5..4c68862 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -97,7 +97,8 @@
// Intrinsics
qpoints->pIndexOf = art_quick_indexof;
- qpoints->pStringCompareTo = art_quick_string_compareto;
+ // The ARM StringCompareTo intrinsic does not call the runtime.
+ qpoints->pStringCompareTo = nullptr;
qpoints->pMemcpy = memcpy;
// Read barrier.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 321b9d2..1bba4f9 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1679,145 +1679,6 @@
pop {r4, r10-r11, pc}
END art_quick_indexof
- /*
- * String's compareTo.
- *
- * Requires rARG0/rARG1 to have been previously checked for null. Will
- * return negative if this's string is < comp, 0 if they are the
- * same and positive if >.
- *
- * On entry:
- * r0: this object pointer
- * r1: comp object pointer
- *
- */
- .extern __memcmp16
-ENTRY art_quick_string_compareto
- mov r2, r0 @ this to r2, opening up r0 for return value
- sub r0, r2, r1 @ Same?
- cbnz r0,1f
- bx lr
-1: @ Same strings, return.
-
- push {r4, r7-r12, lr} @ 8 words - keep alignment
- .cfi_adjust_cfa_offset 32
- .cfi_rel_offset r4, 0
- .cfi_rel_offset r7, 4
- .cfi_rel_offset r8, 8
- .cfi_rel_offset r9, 12
- .cfi_rel_offset r10, 16
- .cfi_rel_offset r11, 20
- .cfi_rel_offset r12, 24
- .cfi_rel_offset lr, 28
-
- ldr r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
- ldr r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
- add r2, #MIRROR_STRING_VALUE_OFFSET
- add r1, #MIRROR_STRING_VALUE_OFFSET
-
- /*
- * At this point, we have:
- * value: r2/r1
- * offset: r4/r9
- * count: r7/r10
- * We're going to compute
- * r11 <- countDiff
- * r10 <- minCount
- */
- subs r11, r7, r10
- it ls
- movls r10, r7
-
- /*
- * Note: data pointers point to previous element so we can use pre-index
- * mode with base writeback.
- */
- subs r2, #2 @ offset to contents[-1]
- subs r1, #2 @ offset to contents[-1]
-
- /*
- * At this point we have:
- * r2: *this string data
- * r1: *comp string data
- * r10: iteration count for comparison
- * r11: value to return if the first part of the string is equal
- * r0: reserved for result
- * r3, r4, r7, r8, r9, r12 available for loading string data
- */
-
- subs r10, #2
- blt .Ldo_remainder2
-
- /*
- * Unroll the first two checks so we can quickly catch early mismatch
- * on long strings (but preserve incoming alignment)
- */
-
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- ldrh r7, [r2, #2]!
- ldrh r8, [r1, #2]!
- subs r0, r3, r4
- it eq
- subseq r0, r7, r8
- bne .Ldone
- cmp r10, #28
- bgt .Ldo_memcmp16
- subs r10, #3
- blt .Ldo_remainder
-
-.Lloopback_triple:
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- ldrh r7, [r2, #2]!
- ldrh r8, [r1, #2]!
- ldrh r9, [r2, #2]!
- ldrh r12,[r1, #2]!
- subs r0, r3, r4
- it eq
- subseq r0, r7, r8
- it eq
- subseq r0, r9, r12
- bne .Ldone
- subs r10, #3
- bge .Lloopback_triple
-
-.Ldo_remainder:
- adds r10, #3
- beq .Lreturn_diff
-
-.Lloopback_single:
- ldrh r3, [r2, #2]!
- ldrh r4, [r1, #2]!
- subs r0, r3, r4
- bne .Ldone
- subs r10, #1
- bne .Lloopback_single
-
-.Lreturn_diff:
- mov r0, r11
- pop {r4, r7-r12, pc}
-
-.Ldo_remainder2:
- adds r10, #2
- bne .Lloopback_single
- mov r0, r11
- pop {r4, r7-r12, pc}
-
- /* Long string case */
-.Ldo_memcmp16:
- mov r7, r11
- add r0, r2, #2
- add r1, r1, #2
- mov r2, r10
- bl __memcmp16
- cmp r0, #0
- it eq
- moveq r0, r7
-.Ldone:
- pop {r4, r7-r12, pc}
-END art_quick_string_compareto
-
/* Assembly routines used to handle ABI differences. */
/* double fmod(double a, double b) */
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 02629e8..a7d6d6f 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1205,9 +1205,9 @@
TEST_F(StubTest, StringCompareTo) {
- // There is no StringCompareTo runtime entrypoint for __aarch64__.
-#if defined(__i386__) || defined(__arm__) || \
- defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
+ // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__.
+#if defined(__i386__) || defined(__mips__) || \
+ (defined(__x86_64__) && !defined(__APPLE__))
// TODO: Check the "Unresolved" allocation stubs
Thread* self = Thread::Current();