Merge "Add rep movsw to x86 and x86_64 instructions."

commit: f67ab129d868b8355a8403a9627f96ac1e41a796 [log] [tgz]
author: Roland Levillain <rpl@google.com> Fri Aug 14 16:07:50 2015 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Fri Aug 14 16:07:50 2015 +0000
tree: e1721cbc248e1d2c5743ff507479677e7fa64144
parent: d95ab77f5db43767fc78f9be42c1dd991033d682 [diff]
parent: b9c4bbee9364a9351376fd1fec9604e7c84778d8 [diff]
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 3c8be27..4471d71 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc

@@ -945,6 +945,97 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+
+  // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
+  locations->AddTemp(Location::RegisterLocation(ECX));
+  locations->AddTemp(Location::RegisterLocation(EDI));
+
+  // Set output, ESI needed for repe_cmpsl instruction anyways.
+  locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register ecx = locations->GetTemp(0).AsRegister<Register>();
+  Register edi = locations->GetTemp(1).AsRegister<Register>();
+  Register esi = locations->Out().AsRegister<Register>();
+
+  Label end;
+  Label return_true;
+  Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check if input is null, return false if it is.
+  __ testl(arg, arg);
+  __ j(kEqual, &return_false);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ movl(ecx, Address(str, class_offset));
+  __ cmpl(ecx, Address(arg, class_offset));
+  __ j(kNotEqual, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ cmpl(str, arg);
+  __ j(kEqual, &return_true);
+
+  // Load length of receiver string.
+  __ movl(ecx, Address(str, count_offset));
+  // Check if lengths are equal, return false if they're not.
+  __ cmpl(ecx, Address(arg, count_offset));
+  __ j(kNotEqual, &return_false);
+  // Return true if both strings are empty.
+  __ testl(ecx, ecx);
+  __ j(kEqual, &return_true);
+
+  // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
+  __ leal(esi, Address(str, value_offset));
+  __ leal(edi, Address(arg, value_offset));
+
+  // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths.
+  __ addl(ecx, Immediate(1));
+  __ shrl(ecx, Immediate(1));
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings two characters at a time starting at the beginning of the string.
+  __ repe_cmpsl();
+  // If strings are not equal, zero flag will be cleared.
+  __ j(kNotEqual, &return_false);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ movl(esi, Immediate(1));
+  __ jmp(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ xorl(esi, esi);
+  __ Bind(&end);
+}
+
 static void CreateStringIndexOfLocations(HInvoke* invoke,
                                          ArenaAllocator* allocator,
                                          bool start_at_zero) {
@@ -1758,7 +1849,6 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 
 #undef UNIMPLEMENTED_INTRINSIC
 

diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 6f45dc8..332012b 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h

@@ -29,21 +29,28 @@
 template<typename T>
 static constexpr int CLZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
-  // TODO: assert unsigned. There is currently many uses with signed values.
+  static_assert(std::is_unsigned<T>::value, "T must be unsigned");
   static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
                 "T too large, must be smaller than long long");
-  return (sizeof(T) == sizeof(uint32_t))
-      ? __builtin_clz(x)  // TODO: __builtin_clz[ll] has undefined behavior for x=0
-      : __builtin_clzll(x);
+  return
+      DCHECK_CONSTEXPR(x != 0, "x must not be zero", T(0))
+      (sizeof(T) == sizeof(uint32_t))
+          ? __builtin_clz(x)
+          : __builtin_clzll(x);
 }
 
 template<typename T>
 static constexpr int CTZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
-  // TODO: assert unsigned. There is currently many uses with signed values.
-  return (sizeof(T) == sizeof(uint32_t))
-      ? __builtin_ctz(x)
-      : __builtin_ctzll(x);
+  // A similar check to the above does not make sense. It isn't as non-intuitive to ask for
+  // trailing zeros in a negative number, and the quick backends do this for immediate encodings.
+  static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
+                "T too large, must be smaller than long long");
+  return
+      DCHECK_CONSTEXPR(x != 0, "x must not be zero", T(0))
+      (sizeof(T) == sizeof(uint32_t))
+          ? __builtin_ctz(x)
+          : __builtin_ctzll(x);
 }
 
 template<typename T>

diff --git a/runtime/leb128.h b/runtime/leb128.h
index 14683d4..976936d 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h

@@ -101,7 +101,7 @@
 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
-  uint32_t x = 6 + 32 - CLZ(data | 1);
+  uint32_t x = 6 + 32 - CLZ(data | 1U);
   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
   return (x * 37) >> 8;
@@ -111,7 +111,7 @@
 static inline uint32_t SignedLeb128Size(int32_t data) {
   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
   data = data ^ (data >> 31);
-  uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1);
+  uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
   return (x * 37) >> 8;
 }
commit	f67ab129d868b8355a8403a9627f96ac1e41a796	[log] [tgz]
author	Roland Levillain <rpl@google.com>	Fri Aug 14 16:07:50 2015 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Fri Aug 14 16:07:50 2015 +0000
tree	e1721cbc248e1d2c5743ff507479677e7fa64144
parent	d95ab77f5db43767fc78f9be42c1dd991033d682 [diff]
parent	b9c4bbee9364a9351376fd1fec9604e7c84778d8 [diff]