AArch64: Clean up CalleeSaveMethod frame and the use of temp registers.

CalleeSaveMethod frame size changes :
SaveAll : 368 -> 176
RefOnly : 176 -> 96
RefsAndArgs : 304 -> 224

JNI register spill size changes :
160 -> 88

In the transition assembly, use registers following the rules:
1. x0-x7 as temp/argument registers.
2. IP0, IP1 as scratch registers.
3. After correct type of callee-save-frame has been setup, all registers
are scratch-able(probably except xSELF and xSUSPEND).
4. When restore callee-save-frame, IP0 and IP1 should be untouched.
5. From C to managed code, we assume all callee save register in AAPCS
will be restored by managed code except x19(SUSPEND).

In quick compiler:
1. Use IP0, IP1 as scratch register.
2. Use IP1 as hidden argument register(IP0 will be scratched by
 trampoline.)

Change-Id: I05ed9d418b01b9e87218a7608536f57e7a286e4c
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index cb830ac..15c6c07 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -20,53 +20,53 @@
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm64.h"
 #include "runtime.h"  // for Runtime::CalleeSaveType.
+#include "utils.h"  // for POPCOUNT
 
 namespace art {
 namespace arm64 {
 
+// Registers need to be restored but not preserved by aapcs64.
+static constexpr uint32_t kArm64CalleeSaveAlwaysSpills =
+    // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that
+    // LR is always saved on the top of the frame for all targets.
+    // That is, lr = *(sp + framesize - pointsize).
+    (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
-    (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
-    (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
-    (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
-    (1 << art::arm64::X28);
+    (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) |
+    (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) |
+    (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) |
+    (1 << art::arm64::X29);
 // X0 is the method pointer. Not saved.
 static constexpr uint32_t kArm64CalleeSaveArgSpills =
     (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
     (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
     (1 << art::arm64::X7);
-// TODO  This is conservative. Only ALL should include the thread register.
-// The thread register is not preserved by the aapcs64.
-// LR is always saved.
-static constexpr uint32_t kArm64CalleeSaveAllSpills =  0;  // (1 << art::arm64::LR);
+static constexpr uint32_t kArm64CalleeSaveAllSpills =
+    // Thread register.
+    (1 << art::arm64::X18) |
+    // Suspend register.
+    1 << art::arm64::X19;
 
-// Save callee-saved floating point registers. Rest are scratch/parameters.
+static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
+static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpArgSpills =
     (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
     (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
     (1 << art::arm64::D6) | (1 << art::arm64::D7);
-static constexpr uint32_t kArm64CalleeSaveFpRefSpills =
+static constexpr uint32_t kArm64FpAllSpills =
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
-static constexpr uint32_t kArm64FpAllSpills =
-    kArm64CalleeSaveFpArgSpills |
-    (1 << art::arm64::D16)  | (1 << art::arm64::D17) | (1 << art::arm64::D18) |
-    (1 << art::arm64::D19)  | (1 << art::arm64::D20) | (1 << art::arm64::D21) |
-    (1 << art::arm64::D22)  | (1 << art::arm64::D23) | (1 << art::arm64::D24) |
-    (1 << art::arm64::D25)  | (1 << art::arm64::D26) | (1 << art::arm64::D27) |
-    (1 << art::arm64::D28)  | (1 << art::arm64::D29) | (1 << art::arm64::D30) |
-    (1 << art::arm64::D31);
 
 constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kArm64CalleeSaveRefSpills |
+  return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
       (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) | (1 << art::arm64::FP) |
-      (1 << art::arm64::X18) | (1 << art::arm64::LR);
+      (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-  return kArm64CalleeSaveFpRefSpills |
+  return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
       (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
       (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0);
 }
@@ -83,6 +83,22 @@
                               Arm64CalleeSaveFpSpills(type));
 }
 
+constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+         (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
+          POPCOUNT(Arm64CalleeSaveFpSpills(type))) * kArm64PointerSize;
+}
+
+constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+         POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * kArm64PointerSize;
+}
+
+constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) {
+  return Arm64CalleeSaveFrameSize(type) -
+      POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) * kArm64PointerSize;
+}
+
 }  // namespace arm64
 }  // namespace art