String Compression for ARM and ARM64

Changes on intrinsics and Code Generation on ARM and ARM64
for string compression feature. Currently the feature is off.

The size of boot.oat and boot.art for ARM before and after the
changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.60% and boot.art decreased by 9.38%.

Meanwhile for ARM64, size of boot.oat and boot.art before and
after changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.48% and boot.art decreased by 6.58%.

Turn feature on: runtime/mirror/string.h (kUseStringCompression = true)
runtime/asm_support.h (STRING_COMPRESSION_FEATURE 1)

Test: m -j31 test-art-target
All tests passed both when the mirror::kUseStringCompression
is ON and OFF.

Bug: 31040547
Change-Id: I24e86b99391df33ba27df747779b648c5a820649
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5d00267..969d653 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2101,7 +2101,8 @@
   Location index = locations->InAt(1);
   Location out = locations->Out();
   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
-
+  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+                                        instruction->IsStringCharAt();
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
@@ -2119,9 +2120,28 @@
   } else {
     // General case.
     MemOperand source = HeapOperand(obj);
+    Register length;
+    if (maybe_compressed_char_at) {
+      uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+      length = temps.AcquireW();
+      __ Ldr(length, HeapOperand(obj, count_offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
     if (index.IsConstant()) {
-      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-      source = HeapOperand(obj, offset);
+      if (maybe_compressed_char_at) {
+        vixl::aarch64::Label uncompressed_load, done;
+        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        __ Ldrb(Register(OutputCPURegister(instruction)),
+                HeapOperand(obj, offset + Int64ConstantFrom(index)));
+        __ B(&done);
+        __ Bind(&uncompressed_load);
+        __ Ldrh(Register(OutputCPURegister(instruction)),
+                HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
+        __ Bind(&done);
+      } else {
+        offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+        source = HeapOperand(obj, offset);
+      }
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
@@ -2139,11 +2159,24 @@
       } else {
         __ Add(temp, obj, offset);
       }
-      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+      if (maybe_compressed_char_at) {
+        vixl::aarch64::Label uncompressed_load, done;
+        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        __ Ldrb(Register(OutputCPURegister(instruction)),
+                HeapOperand(temp, XRegisterFrom(index), LSL, 0));
+        __ B(&done);
+        __ Bind(&uncompressed_load);
+        __ Ldrh(Register(OutputCPURegister(instruction)),
+                HeapOperand(temp, XRegisterFrom(index), LSL, 1));
+        __ Bind(&done);
+      } else {
+        source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+      }
     }
-
-    codegen_->Load(type, OutputCPURegister(instruction), source);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    if (!maybe_compressed_char_at) {
+      codegen_->Load(type, OutputCPURegister(instruction), source);
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
 
     if (type == Primitive::kPrimNot) {
       static_assert(
@@ -2167,9 +2200,14 @@
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+  vixl::aarch64::Register out = OutputRegister(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
+  __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+  }
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
@@ -2361,7 +2399,6 @@
   BoundsCheckSlowPathARM64* slow_path =
       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
-
   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
   __ B(slow_path->GetEntryLabel(), hs);
 }