ARM64: Move xSELF from x18 to x19.

This patch moves xSELF to callee saved x19 and removes support for
ETR (external thread register), previously used across native calls.

Change-Id: Icee07fbb9292425947f7de33d10a0ddf98c7899b
Signed-off-by: Serban Constantinescu <serban.constantinescu@linaro.org>
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 5bf77aa..303ea3e 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -35,14 +35,15 @@
  * r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
  *          the linker, by the trampolines and other stubs (the backend uses
  *          these as temporary registers).
- * r18    : (rxSELF) is reserved (pointer to thread-local storage).
- * r19-r29: Callee save registers (promotion targets).
+ * r18    : Caller save register (used as temporary register).
+ * r19    : (rxSELF) is reserved (pointer to thread-local storage).
+ * r20-r29: Callee save registers (promotion targets).
  * r30    : (lr) is reserved (the link register).
  * rsp    : (sp) is reserved (the stack pointer).
  * rzr    : (zr) is reserved (the zero register).
  *
- * 18 core temps that codegen can use (r0-r17).
- * 10 core registers that can be used for promotion.
+ * 19 core temps that codegen can use (r0-r18).
+ * 9 core registers that can be used for promotion.
  *
  * Floating-point registers
  * v0-v31
@@ -145,7 +146,7 @@
   // Aliases which are not defined in "ARM Architecture Reference, register names".
   rxIP0 = rx16,
   rxIP1 = rx17,
-  rxSELF = rx18,
+  rxSELF = rx19,
   rxLR = rx30,
   /*
    * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
@@ -154,7 +155,7 @@
    */
   rwIP0 = rw16,
   rwIP1 = rw17,
-  rwSELF = rw18,
+  rwSELF = rw19,
   rwLR = rw30,
 };
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index fc32ecd..fe15391 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -51,19 +51,17 @@
      rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-// Note: we are not able to call to C function since rs_xSELF is a special register need to be
-// preserved but would be scratched by native functions follow aapcs64.
 static constexpr RegStorage reserved_regs_arr[] = {rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
 static constexpr RegStorage reserved64_regs_arr[] = {rs_xSELF, rs_sp, rs_xLR, rs_xzr};
 
 static constexpr RegStorage core_temps_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
-     rs_w17};
+     rs_w17, rs_w18};
 static constexpr RegStorage core64_temps_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
-     rs_x17};
+     rs_x17, rs_x18};
 static constexpr RegStorage sp_temps_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
@@ -691,6 +689,7 @@
   Clobber(rs_x15);
   Clobber(rs_x16);
   Clobber(rs_x17);
+  Clobber(rs_x18);
   Clobber(rs_x30);
 
   Clobber(rs_f0);
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
index 48109d2..634fdee 100644
--- a/compiler/dex/quick/quick_cfi_test_expected.inc
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -33,15 +33,15 @@
 // 0x00000014: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF3, 0xD3, 0x02, 0xA9,
+    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
     0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
-    0xF3, 0xD3, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
     0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x93,
-    0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
-    0x49, 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
     0x40,
 };
 // 0x00000000: sub sp, sp, #0x40 (64)
@@ -49,9 +49,9 @@
 // 0x00000004: stp d8, d9, [sp, #24]
 // 0x00000008: .cfi_offset_extended: r72 at cfa-40
 // 0x00000008: .cfi_offset_extended: r73 at cfa-32
-// 0x00000008: stp x19, x20, [sp, #40]
-// 0x0000000c: .cfi_offset: r19 at cfa-24
-// 0x0000000c: .cfi_offset: r20 at cfa-16
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: str lr, [sp, #56]
 // 0x00000010: .cfi_offset: r30 at cfa-8
 // 0x00000010: str w0, [sp]
@@ -59,9 +59,9 @@
 // 0x00000014: ldp d8, d9, [sp, #24]
 // 0x00000018: .cfi_restore_extended: r72
 // 0x00000018: .cfi_restore_extended: r73
-// 0x00000018: ldp x19, x20, [sp, #40]
-// 0x0000001c: .cfi_restore: r19
+// 0x00000018: ldp x20, x21, [sp, #40]
 // 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: ldr lr, [sp, #56]
 // 0x00000020: .cfi_restore: r30
 // 0x00000020: add sp, sp, #0x40 (64)
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index eaf7872..42fc30f 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -84,14 +84,13 @@
     0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
     0xF7, 0x63, 0x08, 0xA9, 0xF9, 0x6B, 0x09, 0xA9, 0xFB, 0x73, 0x0A, 0xA9,
     0xFD, 0x7B, 0x0B, 0xA9, 0xE8, 0x27, 0x02, 0x6D, 0xEA, 0x2F, 0x03, 0x6D,
-    0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xF5, 0x03, 0x12, 0xAA,
-    0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD,
-    0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1,
-    0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xF3, 0x53, 0x46, 0xA9,
-    0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, 0xF9, 0x6B, 0x49, 0xA9,
-    0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, 0xE8, 0x27, 0x42, 0x6D,
-    0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, 0xEE, 0x3F, 0x45, 0x6D,
-    0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xE0, 0x03, 0x00, 0xB9,
+    0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, 0xE2, 0xCF, 0x00, 0xB9,
+    0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, 0xFF, 0x83, 0x00, 0x91,
+    0xF3, 0x53, 0x46, 0xA9, 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9,
+    0xF9, 0x6B, 0x49, 0xA9, 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9,
+    0xE8, 0x27, 0x42, 0x6D, 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D,
+    0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
     0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14,
@@ -99,11 +98,11 @@
     0x44, 0x9B, 0x08, 0x9C, 0x06, 0x44, 0x9D, 0x04, 0x9E, 0x02, 0x44, 0x05,
     0x48, 0x28, 0x05, 0x49, 0x26, 0x44, 0x05, 0x4A, 0x24, 0x05, 0x4B, 0x22,
     0x44, 0x05, 0x4C, 0x20, 0x05, 0x4D, 0x1E, 0x44, 0x05, 0x4E, 0x1C, 0x05,
-    0x4F, 0x1A, 0x5C, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x0A,
-    0x44, 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA,
-    0x44, 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44,
-    0x06, 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E,
-    0x06, 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+    0x4F, 0x1A, 0x58, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x44,
+    0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, 0x44,
+    0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0x06,
+    0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, 0x06,
+    0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
 };
 // 0x00000000: sub sp, sp, #0xc0 (192)
 // 0x00000004: .cfi_def_cfa_offset: 192
@@ -137,53 +136,51 @@
 // 0x00000028: stp d14, d15, [sp, #80]
 // 0x0000002c: .cfi_offset_extended: r78 at cfa-112
 // 0x0000002c: .cfi_offset_extended: r79 at cfa-104
-// 0x0000002c: mov x21, tr
-// 0x00000030: str w0, [sp]
-// 0x00000034: str w1, [sp, #196]
-// 0x00000038: str s0, [sp, #200]
-// 0x0000003c: str w2, [sp, #204]
-// 0x00000040: str w3, [sp, #208]
-// 0x00000044: sub sp, sp, #0x20 (32)
-// 0x00000048: .cfi_def_cfa_offset: 224
-// 0x00000048: add sp, sp, #0x20 (32)
-// 0x0000004c: .cfi_def_cfa_offset: 192
-// 0x0000004c: mov tr, x21
-// 0x00000050: .cfi_remember_state
-// 0x00000050: ldp x19, x20, [sp, #96]
-// 0x00000054: .cfi_restore: r19
-// 0x00000054: .cfi_restore: r20
-// 0x00000054: ldp x21, x22, [sp, #112]
-// 0x00000058: .cfi_restore: r21
-// 0x00000058: .cfi_restore: r22
-// 0x00000058: ldp x23, x24, [sp, #128]
-// 0x0000005c: .cfi_restore: r23
-// 0x0000005c: .cfi_restore: r24
-// 0x0000005c: ldp x25, x26, [sp, #144]
-// 0x00000060: .cfi_restore: r25
-// 0x00000060: .cfi_restore: r26
-// 0x00000060: ldp x27, x28, [sp, #160]
-// 0x00000064: .cfi_restore: r27
-// 0x00000064: .cfi_restore: r28
-// 0x00000064: ldp x29, lr, [sp, #176]
-// 0x00000068: .cfi_restore: r29
-// 0x00000068: .cfi_restore: r30
-// 0x00000068: ldp d8, d9, [sp, #32]
-// 0x0000006c: .cfi_restore_extended: r72
-// 0x0000006c: .cfi_restore_extended: r73
-// 0x0000006c: ldp d10, d11, [sp, #48]
-// 0x00000070: .cfi_restore_extended: r74
-// 0x00000070: .cfi_restore_extended: r75
-// 0x00000070: ldp d12, d13, [sp, #64]
-// 0x00000074: .cfi_restore_extended: r76
-// 0x00000074: .cfi_restore_extended: r77
-// 0x00000074: ldp d14, d15, [sp, #80]
-// 0x00000078: .cfi_restore_extended: r78
-// 0x00000078: .cfi_restore_extended: r79
-// 0x00000078: add sp, sp, #0xc0 (192)
-// 0x0000007c: .cfi_def_cfa_offset: 0
-// 0x0000007c: ret
-// 0x00000080: .cfi_restore_state
-// 0x00000080: .cfi_def_cfa_offset: 192
+// 0x0000002c: str w0, [sp]
+// 0x00000030: str w1, [sp, #196]
+// 0x00000034: str s0, [sp, #200]
+// 0x00000038: str w2, [sp, #204]
+// 0x0000003c: str w3, [sp, #208]
+// 0x00000040: sub sp, sp, #0x20 (32)
+// 0x00000044: .cfi_def_cfa_offset: 224
+// 0x00000044: add sp, sp, #0x20 (32)
+// 0x00000048: .cfi_def_cfa_offset: 192
+// 0x00000048: .cfi_remember_state
+// 0x00000048: ldp x19, x20, [sp, #96]
+// 0x0000004c: .cfi_restore: r19
+// 0x0000004c: .cfi_restore: r20
+// 0x0000004c: ldp x21, x22, [sp, #112]
+// 0x00000050: .cfi_restore: r21
+// 0x00000050: .cfi_restore: r22
+// 0x00000050: ldp x23, x24, [sp, #128]
+// 0x00000054: .cfi_restore: r23
+// 0x00000054: .cfi_restore: r24
+// 0x00000054: ldp x25, x26, [sp, #144]
+// 0x00000058: .cfi_restore: r25
+// 0x00000058: .cfi_restore: r26
+// 0x00000058: ldp x27, x28, [sp, #160]
+// 0x0000005c: .cfi_restore: r27
+// 0x0000005c: .cfi_restore: r28
+// 0x0000005c: ldp x29, lr, [sp, #176]
+// 0x00000060: .cfi_restore: r29
+// 0x00000060: .cfi_restore: r30
+// 0x00000060: ldp d8, d9, [sp, #32]
+// 0x00000064: .cfi_restore_extended: r72
+// 0x00000064: .cfi_restore_extended: r73
+// 0x00000064: ldp d10, d11, [sp, #48]
+// 0x00000068: .cfi_restore_extended: r74
+// 0x00000068: .cfi_restore_extended: r75
+// 0x00000068: ldp d12, d13, [sp, #64]
+// 0x0000006c: .cfi_restore_extended: r76
+// 0x0000006c: .cfi_restore_extended: r77
+// 0x0000006c: ldp d14, d15, [sp, #80]
+// 0x00000070: .cfi_restore_extended: r78
+// 0x00000070: .cfi_restore_extended: r79
+// 0x00000070: add sp, sp, #0xc0 (192)
+// 0x00000074: .cfi_def_cfa_offset: 0
+// 0x00000074: ret
+// 0x00000078: .cfi_restore_state
+// 0x00000078: .cfi_def_cfa_offset: 192
 
 static constexpr uint8_t expected_asm_kX86[] = {
     0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index a6caff1..03dccb9 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -183,7 +183,7 @@
   // Jni function is the native function which the java code wants to call.
   // Jni method is the method that compiled by jni compiler.
   // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21).
+  // Thread register(X19) is saved on stack.
   return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
          1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index b56ca10..ab793a5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -44,7 +44,7 @@
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-const vixl::Register tr = vixl::x18;                        // Thread Register
+const vixl::Register tr = vixl::x19;                        // Thread Register
 static const vixl::Register kArtMethodRegister = vixl::w0;  // Method register on invoke.
 
 const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
@@ -52,10 +52,10 @@
 
 const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
 
-// Callee-saved registers defined by AAPCS64.
+// Callee-saved registers AAPCS64 (without x19 - Thread Register)
 const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
                                                    vixl::kXRegSize,
-                                                   vixl::x19.code(),
+                                                   vixl::x20.code(),
                                                    vixl::x30.code());
 const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
                                                  vixl::kDRegSize,
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2125f6e..ecb3b0a 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,20 +32,20 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+    0xE0, 0x0F, 0x1C, 0xB8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
     0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str w0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x19, x20, [sp, #40]
-// 0x00000008: .cfi_offset: r19 at cfa-24
-// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000008: .cfi_offset: r20 at cfa-24
+// 0x00000008: .cfi_offset: r21 at cfa-16
 // 0x00000008: str lr, [sp, #56]
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
@@ -55,9 +55,9 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x19, x20, [sp, #40]
-// 0x00000018: .cfi_restore: r19
+// 0x00000014: ldp x20, x21, [sp, #40]
 // 0x00000018: .cfi_restore: r20
+// 0x00000018: .cfi_restore: r21
 // 0x00000018: ldr lr, [sp, #56]
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 98702a2..f924c71 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -52,11 +52,11 @@
 }
 
 void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(ETR));
+  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
 }
 
 void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(ETR, SP, offset.Int32Value());
+  StoreToOffset(TR, SP, offset.Int32Value());
 }
 
 // See Arm64 PCS Section 5.2.2.1.
@@ -168,7 +168,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), ETR, offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
@@ -177,14 +177,14 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
   vixl::UseScratchRegisterScope temps(vixl_masm_);
   vixl::Register temp = temps.AcquireX();
   ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value()));
+  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
 void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -285,7 +285,7 @@
 }
 
 void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size);
+  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
@@ -320,7 +320,7 @@
 void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsXRegister()) << dst;
-  LoadFromOffset(dst.AsXRegister(), ETR, offs.Int32Value());
+  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
 }
 
 // Copying routines.
@@ -358,7 +358,7 @@
                                           ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
   StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
 }
 
@@ -368,7 +368,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
@@ -611,7 +611,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsXRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value());
   ___ Cbnz(reg_x(scratch.AsXRegister()), current_exception->Entry());
 }
 
@@ -628,12 +628,7 @@
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
   ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
-
-  // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
-  // to external functions that might trash TR. We do not need the original
-  // ETR(X21) saved in BuildFrame().
-  ___ Mov(reg_x(TR), reg_x(ETR));
+  ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
 
   ___ Blr(temp);
   // Call should never return.
@@ -714,12 +709,7 @@
   SpillRegisters(core_reg_list, frame_size - core_reg_size);
   SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
 
-  // Note: This is specific to JNI method frame.
-  // We will need to move TR(Caller saved in AAPCS) to ETR(Callee saved in AAPCS). The original
-  // (ETR)X21 has been saved on stack. In this way, we can restore TR later.
-  DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR)));
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR)));
-  ___ Mov(reg_x(ETR), reg_x(TR));
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
 
   // Write StackReference<Method>.
   DCHECK(X0 == method_reg.AsArm64().AsXRegister());
@@ -772,11 +762,7 @@
   DCHECK_GE(frame_size, core_reg_size + fp_reg_size + sizeof(StackReference<mirror::ArtMethod>));
   DCHECK_ALIGNED(frame_size, kStackAlignment);
 
-  // Note: This is specific to JNI method frame.
-  // Restore TR(Caller saved in AAPCS) from ETR(Callee saved in AAPCS).
-  DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR)));
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR)));
-  ___ Mov(reg_x(TR), reg_x(ETR));
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
 
   cfi_.RememberState();
 
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index 32c2e62..e27115d 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -623,7 +623,7 @@
   EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
   EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
 
-  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR)));
   EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
   EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
   EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index d195efc..348b2a5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -31,8 +31,7 @@
 // runtime/arch/arm64/registers_arm64.h. We do not include that file to
 // avoid a dependency on libart.
 enum {
-  TR  = 18,
-  ETR = 21,
+  TR  = 19,
   IP0 = 16,
   IP1 = 17,
   FP  = 29,
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index 39a8aa5..051f40b 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -22,9 +22,7 @@
 // Define special registers.
 
 // Register holding Thread::Current().
-#define xSELF x18
-// x18 is not preserved by aapcs64, save it on xETR(External Thread reg) for restore and later use.
-#define xETR x21
+#define xSELF x19
 // Frame Pointer
 #define xFP   x29
 // Link Register
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 998f567..989ecc6 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -20,7 +20,7 @@
 #include "asm_support.h"
 
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 112
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index ec9c122..2e93c1d 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -103,6 +103,7 @@
   gprs_[X13] = nullptr;
   gprs_[X14] = nullptr;
   gprs_[X15] = nullptr;
+  gprs_[X18] = nullptr;
 
   // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 8c8f8d5..2ce2a29 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -27,16 +27,9 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
+extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
-// Single-precision FP arithmetics.
-extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
-
-// Double-precision FP arithmetics.
-extern "C" double art_quick_fmod(double a, double b);        // REM_DOUBLE[_2ADDR]
-
-
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
   // Interpreter
@@ -50,7 +43,7 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
+  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -110,9 +103,9 @@
   qpoints->pCmpgFloat = nullptr;
   qpoints->pCmplDouble = nullptr;
   qpoints->pCmplFloat = nullptr;
-  qpoints->pFmod = art_quick_fmod;
+  qpoints->pFmod = fmod;
   qpoints->pL2d = nullptr;
-  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pFmodf = fmodf;
   qpoints->pL2f = nullptr;
   qpoints->pD2iz = nullptr;
   qpoints->pF2iz = nullptr;
@@ -129,7 +122,7 @@
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = art_quick_memcpy;
+  qpoints->pMemcpy = memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6b16a2e5..991d29f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -32,6 +32,8 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
+
+    // Loads appropriate callee-save-method.
     ldr wIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
@@ -42,43 +44,40 @@
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
-    // FP callee-saves
-    stp d8, d9,   [sp, #8]
-    stp d10, d11, [sp, #24]
-    stp d12, d13, [sp, #40]
-    stp d14, d15, [sp, #56]
+    // Stack alignment filler [sp, #8].
+    // FP callee-saves.
+    stp d8, d9,   [sp, #16]
+    stp d10, d11, [sp, #32]
+    stp d12, d13, [sp, #48]
+    stp d14, d15, [sp, #64]
 
-    // Thread register and x19 (callee-save)
-    stp xSELF, x19, [sp, #72]
-    .cfi_rel_offset x18, 72
+    // GP callee-saves
+    stp x19, x20, [sp, #80]
     .cfi_rel_offset x19, 80
-
-    // callee-saves
-    stp x20, x21, [sp, #88]
     .cfi_rel_offset x20, 88
+
+    stp x21, x22, [sp, #96]
     .cfi_rel_offset x21, 96
-
-    stp x22, x23, [sp, #104]
     .cfi_rel_offset x22, 104
+
+    stp x23, x24, [sp, #112]
     .cfi_rel_offset x23, 112
-
-    stp x24, x25, [sp, #120]
     .cfi_rel_offset x24, 120
+
+    stp x25, x26, [sp, #128]
     .cfi_rel_offset x25, 128
-
-    stp x26, x27, [sp, #136]
     .cfi_rel_offset x26, 136
+
+    stp x27, x28, [sp, #144]
     .cfi_rel_offset x27, 144
-
-    stp x28, x29, [sp, #152]
     .cfi_rel_offset x28, 152
-    .cfi_rel_offset x29, 160
 
-    str xLR, [sp, #168]
+    stp x29, xLR, [sp, #160]
+    .cfi_rel_offset x29, 160
     .cfi_rel_offset x30, 168
 
-    // Loads appropriate callee-save-method
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs].
+    str xIP0, [sp]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -95,49 +94,46 @@
     // Our registers aren't intermixed - just spill in order.
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
+
+    // Loads appropriate callee-save-method.
     ldr wIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
-    sub sp, sp, #112
-    .cfi_adjust_cfa_offset 112
+    sub sp, sp, #96
+    .cfi_adjust_cfa_offset 96
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 112)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
-    // Callee-saves
-    stp x19, x20,  [sp, #16]
-    .cfi_rel_offset x19, 16
-    .cfi_rel_offset x20, 24
+    // GP callee-saves.
+    // x20 paired with ArtMethod* - see below.
+    stp x21, x22, [sp, #16]
+    .cfi_rel_offset x21, 16
+    .cfi_rel_offset x22, 24
 
-    stp x21, x22, [sp, #32]
-    .cfi_rel_offset x21, 32
-    .cfi_rel_offset x22, 40
+    stp x23, x24, [sp, #32]
+    .cfi_rel_offset x23, 32
+    .cfi_rel_offset x24, 40
 
-    stp x23, x24, [sp, #48]
-    .cfi_rel_offset x23, 48
-    .cfi_rel_offset x24, 56
+    stp x25, x26, [sp, #48]
+    .cfi_rel_offset x25, 48
+    .cfi_rel_offset x26, 56
 
-    stp x25, x26, [sp, #64]
-    .cfi_rel_offset x25, 64
-    .cfi_rel_offset x26, 72
+    stp x27, x28, [sp, #64]
+    .cfi_rel_offset x27, 64
+    .cfi_rel_offset x28, 72
 
-    stp x27, x28, [sp, #80]
-    .cfi_rel_offset x27, 80
-    .cfi_rel_offset x28, 88
+    stp x29, xLR, [sp, #80]
+    .cfi_rel_offset x29, 80
+    .cfi_rel_offset x30, 88
 
-    // x29(callee-save) and LR
-    stp x29, xLR, [sp, #96]
-    .cfi_rel_offset x29, 96
-    .cfi_rel_offset x30, 104
+    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly].
+    stp xIP0, x20, [sp]
+    .cfi_rel_offset x20, 8
 
-    // Save xSELF to xETR.
-    mov xETR, xSELF
-
-    // Loads appropriate callee-save-method
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -145,48 +141,37 @@
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    // Restore xSELF.
-    mov xSELF, xETR
-
-    // Callee-saves
-    ldp x19, x20,  [sp, #16]
-    .cfi_restore x19
+    // Callee-saves.
+    ldr x20, [sp, #8]
     .cfi_restore x20
 
-    ldp x21, x22, [sp, #32]
+    ldp x21, x22, [sp, #16]
     .cfi_restore x21
     .cfi_restore x22
 
-    ldp x23, x24, [sp, #48]
+    ldp x23, x24, [sp, #32]
     .cfi_restore x23
     .cfi_restore x24
 
-    ldp x25, x26, [sp, #64]
+    ldp x25, x26, [sp, #48]
     .cfi_restore x25
     .cfi_restore x26
 
-    ldp x27, x28, [sp, #80]
+    ldp x27, x28, [sp, #64]
     .cfi_restore x27
     .cfi_restore x28
 
-    // x29(callee-save) and LR
-    ldp x29, xLR, [sp, #96]
+    ldp x29, xLR, [sp, #80]
     .cfi_restore x29
     .cfi_restore x30
 
-    add sp, sp, #112
-    .cfi_adjust_cfa_offset -112
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset -96
 .endm
 
 .macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
-    // Restore xSELF as it might be scratched.
-    mov xSELF, xETR
-    // ETR
-    ldr xETR, [sp, #32]
-    .cfi_restore x21
-
-    add sp, sp, #112
-    .cfi_adjust_cfa_offset -112
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset - 96
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -204,31 +189,29 @@
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
+    // Stack alignment filler [sp, #8].
     // FP args.
-    stp d0, d1, [sp, #8]
-    stp d2, d3, [sp, #24]
-    stp d4, d5, [sp, #40]
-    stp d6, d7, [sp, #56]
+    stp d0, d1, [sp, #16]
+    stp d2, d3, [sp, #32]
+    stp d4, d5, [sp, #48]
+    stp d6, d7, [sp, #64]
 
     // Core args.
-    str x1, [sp, 72]
-    .cfi_rel_offset x1, 72
+    stp x1, x2, [sp, #80]
+    .cfi_rel_offset x1, 80
+    .cfi_rel_offset x2, 88
 
-    stp x2,  x3, [sp, #80]
-    .cfi_rel_offset x2, 80
-    .cfi_rel_offset x3, 88
+    stp x3, x4, [sp, #96]
+    .cfi_rel_offset x3, 96
+    .cfi_rel_offset x4, 104
 
-    stp x4,  x5, [sp, #96]
-    .cfi_rel_offset x4, 96
-    .cfi_rel_offset x5, 104
+    stp x5, x6, [sp, #112]
+    .cfi_rel_offset x5, 112
+    .cfi_rel_offset x6, 120
 
-    stp x6,  x7, [sp, #112]
-    .cfi_rel_offset x6, 112
-    .cfi_rel_offset x7, 120
-
-    // Callee-saves.
-    stp x19, x20, [sp, #128]
-    .cfi_rel_offset x19, 128
+    // x7, Callee-saves.
+    stp x7, x20, [sp, #128]
+    .cfi_rel_offset x7, 128
     .cfi_rel_offset x20, 136
 
     stp x21, x22, [sp, #144]
@@ -247,13 +230,11 @@
     .cfi_rel_offset x27, 192
     .cfi_rel_offset x28, 200
 
-    // x29(callee-save) and LR
+    // x29(callee-save) and LR.
     stp x29, xLR, [sp, #208]
     .cfi_rel_offset x29, 208
     .cfi_rel_offset x30, 216
 
-    // Save xSELF to xETR.
-    mov xETR, xSELF
 .endm
 
     /*
@@ -291,34 +272,28 @@
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    // Restore xSELF.
-    mov xSELF, xETR
-
     // FP args.
-    ldp d0, d1, [sp, #8]
-    ldp d2, d3, [sp, #24]
-    ldp d4, d5, [sp, #40]
-    ldp d6, d7, [sp, #56]
+    ldp d0, d1, [sp, #16]
+    ldp d2, d3, [sp, #32]
+    ldp d4, d5, [sp, #48]
+    ldp d6, d7, [sp, #64]
 
     // Core args.
-    ldr x1, [sp, 72]
+    ldp x1, x2, [sp, #80]
     .cfi_restore x1
-
-    ldp x2,  x3, [sp, #80]
     .cfi_restore x2
+
+    ldp x3, x4, [sp, #96]
     .cfi_restore x3
-
-    ldp x4,  x5, [sp, #96]
     .cfi_restore x4
+
+    ldp x5, x6, [sp, #112]
     .cfi_restore x5
-
-    ldp x6,  x7, [sp, #112]
     .cfi_restore x6
-    .cfi_restore x7
 
-    // Callee-saves.
-    ldp x19, x20, [sp, #128]
-    .cfi_restore x19
+    // x7, Callee-saves.
+    ldp x7, x20, [sp, #128]
+    .cfi_restore x7
     .cfi_restore x20
 
     ldp x21, x22, [sp, #144]
@@ -337,7 +312,7 @@
     .cfi_restore x27
     .cfi_restore x28
 
-    // x29(callee-save) and LR
+    // x29(callee-save) and LR.
     ldp x29, xLR, [sp, #208]
     .cfi_restore x29
     .cfi_restore x30
@@ -1106,13 +1081,12 @@
     .extern artThrowClassCastException
 ENTRY art_quick_check_cast
     // Store arguments and link register
-    sub sp, sp, #32                     // Stack needs to be 16b aligned on calls
+    // Stack needs to be 16B aligned on calls.
+    stp x0, x1, [sp,#-32]!
     .cfi_adjust_cfa_offset 32
-    stp x0, x1, [sp]
     .cfi_rel_offset x0, 0
     .cfi_rel_offset x1, 8
-    stp xSELF, xLR, [sp, #16]
-    .cfi_rel_offset x18, 16
+    str xLR, [sp, #24]
     .cfi_rel_offset x30, 24
 
     // Call runtime code
@@ -1122,25 +1096,21 @@
     cbz x0, .Lthrow_class_cast_exception
 
     // Restore and return
-    ldp x0, x1, [sp]
+    ldr xLR, [sp, #24]
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp xSELF, xLR, [sp, #16]
-    .cfi_restore x18
-    .cfi_restore x30
-    add sp, sp, #32
     .cfi_adjust_cfa_offset -32
     ret
 
 .Lthrow_class_cast_exception:
     // Restore
-    ldp x0, x1, [sp]
+    ldr xLR, [sp, #24]
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp xSELF, xLR, [sp, #16]
-    .cfi_restore x18
-    .cfi_restore x30
-    add sp, sp, #32
     .cfi_adjust_cfa_offset -32
 
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
@@ -1201,16 +1171,13 @@
     ret
 .Lcheck_assignability:
     // Store arguments and link register
-    sub sp, sp, #48                     // Stack needs to be 16b aligned on calls
-    .cfi_adjust_cfa_offset 48
-    stp x0, x1, [sp]
+    stp x0, x1, [sp,#-32]!
+    .cfi_adjust_cfa_offset 32
     .cfi_rel_offset x0, 0
     .cfi_rel_offset x1, 8
-    stp x2, xSELF, [sp, #16]
+    stp x2, xLR, [sp, #16]
     .cfi_rel_offset x2, 16
-    .cfi_rel_offset x18, 24
-    str xLR, [sp, #32]
-    .cfi_rel_offset x30, 32
+    .cfi_rel_offset x30, 24
 
     // Call runtime code
     mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
@@ -1221,16 +1188,13 @@
     cbz x0, .Lthrow_array_store_exception
 
     // Restore
-    ldp x0, x1, [sp]
+    ldp x2, x30, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp x2, xSELF, [sp, #16]
-    .cfi_restore x2
-    .cfi_restore x18
-    ldr xLR, [sp, #32]
-    .cfi_restore x30
-    add sp, sp, #48
-    .cfi_adjust_cfa_offset -48
+    .cfi_adjust_cfa_offset -32
 
     add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
                                                           // "Compress" = do nothing
@@ -1240,16 +1204,13 @@
     strb w3, [x3, x0]
     ret
 .Lthrow_array_store_exception:
-    ldp x0, x1, [sp]
+    ldp x2, x30, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp x2, xSELF, [sp, #16]
-    .cfi_restore x2
-    .cfi_restore x18
-    ldr xLR, [sp, #32]
-    .cfi_restore x30
-    add sp, sp, #48
-    .cfi_adjust_cfa_offset -48
+    .cfi_adjust_cfa_offset -32
 
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov x1, x2                    // Pass value.
@@ -1450,8 +1411,7 @@
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
-    // Use xETR as xSELF might be scratched by native function above.
-    ldr     x2, [xETR, THREAD_EXCEPTION_OFFSET]
+    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
@@ -1601,15 +1561,14 @@
     // prepare for artQuickGenericJniEndTrampoline call
     // (Thread*, result, result_f)
     //    x0       x1       x2        <= C calling convention
-    mov x1, x0      // Result (from saved)
-    mov x0, xETR    // Thread register, original xSELF might be scratched by native code.
+    mov x1, x0      // Result (from saved).
+    mov x0, xSELF   // Thread register.
     fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
 
     bl artQuickGenericJniEndTrampoline
 
     // Pending exceptions possible.
-    // Use xETR as xSELF might be scratched by native code
-    ldr x2, [xETR, THREAD_EXCEPTION_OFFSET]
+    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz x2, .Lexception_in_native
 
     // Tear down the alloca.
@@ -1624,8 +1583,6 @@
     ret
 
 .Lexception_in_native:
-    // Restore xSELF. It might have been scratched by native code.
-    mov xSELF, xETR
     // Move to x1 then sp to please assembler.
     ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
     mov sp, x1
@@ -1921,21 +1878,3 @@
     csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
     ret
 END art_quick_string_compareto
-
-// Macro to facilitate adding new entrypoints which call to native function directly.
-// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
-// But we might introduce more differences.
-.macro NATIVE_DOWNCALL name, entrypoint
-    .extern \entrypoint
-ENTRY \name
-    stp    xSELF, xLR, [sp, #-16]!
-    bl     \entrypoint
-    ldp    xSELF, xLR, [sp], #16
-    ret
-END \name
-.endm
-
-NATIVE_DOWNCALL art_quick_fmod fmod
-NATIVE_DOWNCALL art_quick_fmodf fmodf
-NATIVE_DOWNCALL art_quick_memcpy memcpy
-NATIVE_DOWNCALL art_quick_assignable_from_code artIsAssignableFromCode
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index 61b4dff..bf1a92d 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -33,18 +33,17 @@
     (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
-    (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
-    (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
-    (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
-    (1 << art::arm64::X28) | (1 << art::arm64::X29);
+    (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) |
+    (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) |
+    (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) |
+    (1 << art::arm64::X29);
 // X0 is the method pointer. Not saved.
 static constexpr uint32_t kArm64CalleeSaveArgSpills =
     (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
     (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
     (1 << art::arm64::X7);
 static constexpr uint32_t kArm64CalleeSaveAllSpills =
-    // Thread register.
-    (1 << art::arm64::X18);
+    (1 << art::arm64::X19);
 
 static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 51ae184..4683fc3 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -60,8 +60,7 @@
                  // different enum value to distinguish between the two.
   kNumberOfXRegisters = 33,
   // Aliases.
-  TR  = X18,     // ART Thread Register - Managed Runtime (Caller Saved Reg)
-  ETR = X21,     // ART Thread Register - External Calls  (Callee Saved Reg)
+  TR  = X19,     // ART Thread Register - Managed Runtime (Callee Saved Reg)
   IP0 = X16,     // Used as scratch by VIXL.
   IP1 = X17,     // Used as scratch by ART JNI Assembler.
   FP  = X29,
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index a7d24b8..23b7cfa 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -173,7 +173,7 @@
         // Load call params into the right registers.
         "ldp x0, x1, [sp]\n\t"
         "ldp x2, x3, [sp, #16]\n\t"
-        "ldr x18, [sp, #32]\n\t"
+        "ldr x19, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
@@ -526,7 +526,7 @@
         // Load call params into the right registers.
         "ldp x0, x1, [sp]\n\t"
         "ldp x2, x3, [sp, #16]\n\t"
-        "ldp x18, x17, [sp, #32]\n\t"
+        "ldp x19, x17, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 345b0ad..838427f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -89,7 +89,7 @@
   // | LR         |
   // | X29        |
   // |  :         |
-  // | X19        |
+  // | X20        |
   // | X7         |
   // | :          |
   // | X1         |