AArch64: Add transition assembly to wrapper native functions.

There is slight difference between managed code ABI and AAPCS. We
can make managed code ABI to be the same with AAPCS. But considering
that, we might introduce more differences later for performance. It
is better to have a wrapper to deal with the differences.

Change-Id: I46ced072e9e3a83f713d2bf86fa478fc6144ee81
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 265e8d2..9814cb4 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,7 +45,6 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -89,7 +88,6 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       {
         ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
         RegStorage r_tgt = CallHelperSetup(helper_offset);
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 84ee778..cbb2c27 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -72,17 +72,14 @@
 extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
 // Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
+extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
 
 // Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+
+// Memcpy
+extern "C" void* art_quick_memcpy(void* __restrict, const void* __restrict, size_t);
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -175,31 +172,31 @@
   qpoints->pUnlockObject = art_quick_unlock_object;
 
   // Math
-  // TODO NULL entrypoints not needed for ARM64 - generate inline.
-  qpoints->pCmpgDouble = CmpgDouble;
-  qpoints->pCmpgFloat = CmpgFloat;
-  qpoints->pCmplDouble = CmplDouble;
-  qpoints->pCmplFloat = CmplFloat;
-  qpoints->pFmod = fmod;
-  qpoints->pL2d = NULL;
-  qpoints->pFmodf = fmodf;
-  qpoints->pL2f = NULL;
-  qpoints->pD2iz = NULL;
-  qpoints->pF2iz = NULL;
-  qpoints->pIdivmod = NULL;
-  qpoints->pD2l = NULL;
-  qpoints->pF2l = NULL;
-  qpoints->pLdiv = NULL;
-  qpoints->pLmod = NULL;
-  qpoints->pLmul = NULL;
-  qpoints->pShlLong = NULL;
-  qpoints->pShrLong = NULL;
-  qpoints->pUshrLong = NULL;
+  // TODO nullptr entrypoints not needed for ARM64 - generate inline.
+  qpoints->pCmpgDouble = nullptr;
+  qpoints->pCmpgFloat = nullptr;
+  qpoints->pCmplDouble = nullptr;
+  qpoints->pCmplFloat = nullptr;
+  qpoints->pFmod = art_quick_fmod;
+  qpoints->pL2d = nullptr;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = nullptr;
+  qpoints->pD2iz = nullptr;
+  qpoints->pF2iz = nullptr;
+  qpoints->pIdivmod = nullptr;
+  qpoints->pD2l = nullptr;
+  qpoints->pF2l = nullptr;
+  qpoints->pLdiv = nullptr;
+  qpoints->pLmod = nullptr;
+  qpoints->pLmul = nullptr;
+  qpoints->pShlLong = nullptr;
+  qpoints->pShrLong = nullptr;
+  qpoints->pUshrLong = nullptr;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = memcpy;
+  qpoints->pMemcpy = art_quick_memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d704788..4ede453 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1862,3 +1862,22 @@
     csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
     ret
 END art_quick_string_compareto
+
+// Macro to facilitate adding new entrypoints which call to native function directly.
+// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
+// But we might introduce more differences.
+.macro NATIVE_DOWNCALL name, entrypoint
+    .extern \entrypoint
+ENTRY \name
+    sub    sp, sp, #16
+    stp    xSELF, xLR, [sp]
+    bl     \entrypoint
+    ldp    xSELF, xLR, [sp]
+    add    sp, sp, #16
+    ret
+END \name
+.endm
+
+NATIVE_DOWNCALL art_quick_fmod fmod
+NATIVE_DOWNCALL art_quick_fmodf fmodf
+NATIVE_DOWNCALL art_quick_memcpy memcpy