Change string compression encoding.

Encode the string compression flag as the least significant
bit of the "count" field, with 0 meaning compressed and 1
meaning uncompressed.

The main vdex file is a tiny bit larger (+28B for prebuilt
boot images, +32 for on-device built images) and the oat
file sizes change. Measured on Nexus 9, AOSP ToT, these
changes are insignificant when string compression is
disabled (-200B for the 32-bit boot*.oat for prebuilt boot
image, -4KiB when built on the device attributable to
rounding, -16B for 64-bit boot*.oat for prebuilt boot image,
no change when built on device) but with string compression
enabled we get significant differences:
  prebuilt multi-part boot image:
    - 32-bit boot*.oat: -28KiB
    - 64-bit boot*.oat: -24KiB
  on-device built single boot image:
    - 32-bit boot.oat: -32KiB
    - 64-bit boot.oat: -28KiB
The boot image oat file overhead for string compression:
  prebuilt multi-part boot image:
    - 32-bit boot*.oat: before: ~80KiB after: ~52KiB
    - 64-bit boot*.oat: before: ~116KiB after: ~92KiB
  on-device built single boot image:
    - 32-bit boot.oat: before: 92KiB after: 60KiB
    - 64-bit boot.oat: before: 116KiB after: 92KiB

The differences in the SplitStringBenchmark seem to be lost
in the noise.

Test: Run ART test suite on host and Nexus 9 with Optimizing.
Test: Run ART test suite on host and Nexus 9 with interpreter.
Test: All of the above with string compression enabled.
Bug: 31040547

Change-Id: I7570c2b700f1a31004a2d3c18b1cc30046d35a74
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 232c3b3..a3327e2 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -199,7 +199,7 @@
       }
       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
       if (mirror::kUseStringCompression) {
-        __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX));
+        __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
       }
     }
 
@@ -4729,9 +4729,11 @@
         // Branch cases into compressed and uncompressed for each index's type.
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         NearLabel done, not_compressed;
-        __ cmpl(Address(obj, count_offset), Immediate(0));
+        __ testl(Address(obj, count_offset), Immediate(1));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ j(kGreaterEqual, &not_compressed);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ j(kNotZero, &not_compressed);
         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
         __ jmp(&done);
         __ Bind(&not_compressed);
@@ -5063,7 +5065,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out most significant bit in case the array is String's array of char.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ andl(out, Immediate(INT32_MAX));
+    __ shrl(out, Immediate(1));
   }
 }
 
@@ -5115,10 +5117,12 @@
       Location array_loc = array_length->GetLocations()->InAt(0);
       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
+        // the string compression flag) with the in-memory length and avoid the temporary.
         CpuRegister length_reg = CpuRegister(TMP);
         __ movl(length_reg, array_len);
         codegen_->MaybeRecordImplicitNullCheck(array_length);
-        __ andl(length_reg, Immediate(INT32_MAX));
+        __ shrl(length_reg, Immediate(1));
         codegen_->GenerateIntCompare(length_reg, index_loc);
       } else {
         // Checking the bound for general case: