String Compression for ARM and ARM64

Changes on intrinsics and Code Generation on ARM and ARM64
for string compression feature. Currently the feature is off.

The size of boot.oat and boot.art for ARM before and after the
changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.60% and boot.art decreased by 9.38%.

Meanwhile for ARM64, size of boot.oat and boot.art before and
after changes (feature OFF) are still. When the feature ON,
boot.oat increased by 0.48% and boot.art decreased by 6.58%.

Turn feature on: runtime/mirror/string.h (kUseStringCompression = true)
runtime/asm_support.h (STRING_COMPRESSION_FEATURE 1)

Test: m -j31 test-art-target
All tests passed both when the mirror::kUseStringCompression
is ON and OFF.

Bug: 31040547
Change-Id: I24e86b99391df33ba27df747779b648c5a820649
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index eee949d..04a3cc6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2378,7 +2378,11 @@
 ENTRY art_quick_indexof
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
     add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
-
+#if (STRING_COMPRESSION_FEATURE)
+    /* w4 holds count (with flag) and w3 holds actual length */
+    mov   w4, w3
+    and   w3, w3, #2147483647
+#endif
     /* Clamp start to [0..count] */
     cmp   w2, #0
     csel  w2, wzr, w2, lt
@@ -2388,10 +2392,12 @@
     /* Save a copy to compute result */
     mov   x5, x0
 
+#if (STRING_COMPRESSION_FEATURE)
+    tbnz  w4, #31, .Lstring_indexof_compressed
+#endif
     /* Build pointer to start of data to compare and pre-bias */
     add   x0, x0, x2, lsl #1
     sub   x0, x0, #2
-
     /* Compute iteration count */
     sub   w2, w3, w2
 
@@ -2456,6 +2462,26 @@
     sub   x0, x0, x5
     asr   x0, x0, #1
     ret
+#if (STRING_COMPRESSION_FEATURE)
+   /*
+    * Comparing compressed string character-per-character with
+    * input character
+    */
+.Lstring_indexof_compressed:
+    add   x0, x0, x2
+    sub   x0, x0, #1
+    sub   w2, w3, w2
+.Lstring_indexof_compressed_loop:
+    subs  w2, w2, #1
+    b.lt  .Lindexof_nomatch
+    ldrb  w6, [x0, #1]!
+    cmp   w6, w1
+    b.eq  .Lstring_indexof_compressed_matched
+    b     .Lstring_indexof_compressed_loop
+.Lstring_indexof_compressed_matched:
+    sub   x0, x0, x5
+    ret
+#endif
 END art_quick_indexof
 
     /*