Merge "Add rep movsw to x86 and x86_64 instructions."
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 3c8be27..4471d71 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -945,6 +945,97 @@
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+
+ // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ locations->AddTemp(Location::RegisterLocation(EDI));
+
+ // Set output, ESI needed for repe_cmpsl instruction anyways.
+ locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
+ X86Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register str = locations->InAt(0).AsRegister<Register>();
+ Register arg = locations->InAt(1).AsRegister<Register>();
+ Register ecx = locations->GetTemp(0).AsRegister<Register>();
+ Register edi = locations->GetTemp(1).AsRegister<Register>();
+ Register esi = locations->Out().AsRegister<Register>();
+
+ Label end;
+ Label return_true;
+ Label return_false;
+
+ // Get offsets of count, value, and class fields within a string object.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check if input is null, return false if it is.
+ __ testl(arg, arg);
+ __ j(kEqual, &return_false);
+
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ movl(ecx, Address(str, class_offset));
+ __ cmpl(ecx, Address(arg, class_offset));
+ __ j(kNotEqual, &return_false);
+
+ // Reference equality check, return true if same reference.
+ __ cmpl(str, arg);
+ __ j(kEqual, &return_true);
+
+ // Load length of receiver string.
+ __ movl(ecx, Address(str, count_offset));
+ // Check if lengths are equal, return false if they're not.
+ __ cmpl(ecx, Address(arg, count_offset));
+ __ j(kNotEqual, &return_false);
+ // Return true if both strings are empty.
+ __ testl(ecx, ecx);
+ __ j(kEqual, &return_true);
+
+ // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
+ __ leal(esi, Address(str, value_offset));
+ __ leal(edi, Address(arg, value_offset));
+
+ // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
+ __ addl(ecx, Immediate(1));
+ __ shrl(ecx, Immediate(1));
+
+ // Assertions that must hold in order to compare strings 2 characters at a time.
+ DCHECK_ALIGNED(value_offset, 4);
+ static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+ // Loop to compare strings two characters at a time starting at the beginning of the string.
+ __ repe_cmpsl();
+ // If strings are not equal, zero flag will be cleared.
+ __ j(kNotEqual, &return_false);
+
+ // Return true and exit the function.
+ // If loop does not result in returning false, we return true.
+ __ Bind(&return_true);
+ __ movl(esi, Immediate(1));
+ __ jmp(&end);
+
+ // Return false and exit the function.
+ __ Bind(&return_false);
+ __ xorl(esi, esi);
+ __ Bind(&end);
+}
+
static void CreateStringIndexOfLocations(HInvoke* invoke,
ArenaAllocator* allocator,
bool start_at_zero) {
@@ -1758,7 +1849,6 @@
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 6f45dc8..332012b 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -29,21 +29,28 @@
template<typename T>
static constexpr int CLZ(T x) {
static_assert(std::is_integral<T>::value, "T must be integral");
- // TODO: assert unsigned. There is currently many uses with signed values.
+ static_assert(std::is_unsigned<T>::value, "T must be unsigned");
static_assert(sizeof(T) <= sizeof(long long), // NOLINT [runtime/int] [4]
"T too large, must be smaller than long long");
- return (sizeof(T) == sizeof(uint32_t))
- ? __builtin_clz(x) // TODO: __builtin_clz[ll] has undefined behavior for x=0
- : __builtin_clzll(x);
+ return
+ DCHECK_CONSTEXPR(x != 0, "x must not be zero", T(0))
+ (sizeof(T) == sizeof(uint32_t))
+ ? __builtin_clz(x)
+ : __builtin_clzll(x);
}
template<typename T>
static constexpr int CTZ(T x) {
static_assert(std::is_integral<T>::value, "T must be integral");
- // TODO: assert unsigned. There is currently many uses with signed values.
- return (sizeof(T) == sizeof(uint32_t))
- ? __builtin_ctz(x)
- : __builtin_ctzll(x);
+ // A similar check to the above does not make sense. It isn't as non-intuitive to ask for
+ // trailing zeros in a negative number, and the quick backends do this for immediate encodings.
+ static_assert(sizeof(T) <= sizeof(long long), // NOLINT [runtime/int] [4]
+ "T too large, must be smaller than long long");
+ return
+ DCHECK_CONSTEXPR(x != 0, "x must not be zero", T(0))
+ (sizeof(T) == sizeof(uint32_t))
+ ? __builtin_ctz(x)
+ : __builtin_ctzll(x);
}
template<typename T>
diff --git a/runtime/leb128.h b/runtime/leb128.h
index 14683d4..976936d 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -101,7 +101,7 @@
static inline uint32_t UnsignedLeb128Size(uint32_t data) {
// bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1)
// bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7
- uint32_t x = 6 + 32 - CLZ(data | 1);
+ uint32_t x = 6 + 32 - CLZ(data | 1U);
// Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
// This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
return (x * 37) >> 8;
@@ -111,7 +111,7 @@
static inline uint32_t SignedLeb128Size(int32_t data) {
// Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
data = data ^ (data >> 31);
- uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1);
+ uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
return (x * 37) >> 8;
}