Merge "Basic block combine pass" into dalvik-dev
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index 305f359..5aedff7 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -111,7 +111,7 @@
// Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
-extern "C" int64_t __aeabi_lmul(int64_t, int64_t);
+extern "C" int64_t art_mul_long(int64_t, int64_t);
extern "C" uint64_t art_shl_long(uint64_t, uint32_t);
extern "C" uint64_t art_shr_long(uint64_t, uint32_t);
extern "C" uint64_t art_ushr_long(uint64_t, uint32_t);
@@ -230,7 +230,7 @@
points->pF2l = art_f2l;
points->pLdiv = __aeabi_ldivmod;
points->pLdivmod = __aeabi_ldivmod; // result returned in r2:r3
- points->pLmul = __aeabi_lmul;
+ points->pLmul = art_mul_long;
points->pShlLong = art_shl_long;
points->pShrLong = art_shr_long;
points->pUshrLong = art_ushr_long;
diff --git a/src/oat/runtime/arm/runtime_support_arm.S b/src/oat/runtime/arm/runtime_support_arm.S
index 5b4cd1b..eefaed0 100644
--- a/src/oat/runtime/arm/runtime_support_arm.S
+++ b/src/oat/runtime/arm/runtime_support_arm.S
@@ -794,6 +794,38 @@
pop {r0, r1} @ restore return value
bx lr @ return
+ .global art_mul_long
+ /*
+ * Signed 64-bit integer multiply.
+ *
+ * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+ * WX
+ * x YZ
+ * --------
+ * ZW ZX
+ * YW YX
+ *
+ * The low word of the result holds ZX, the high word holds
+ * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because
+ * it doesn't fit in the low 64 bits.
+ *
+ * Unlike most ARM math operations, multiply instructions have
+ * restrictions on using the same register more than once (Rd and Rm
+ * cannot be the same).
+ */
+ /* mul-long vAA, vBB, vCC */
+ ALIGN_FUNCTION_ENTRY
+art_mul_long:
+ push {r9 - r10}
+ mul ip, r2, r1 @ ip<- ZxW
+ umull r9, r10, r2, r0 @ r9/r10 <- ZxX
+ mla r2, r0, r3, ip @ r2<- YxX + (ZxW)
+ add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX))
+ mov r0,r9
+ mov r1,r10
+ pop {r9 - r10}
+ bx lr
+
.global art_shl_long
/*
* Long integer shift. This is different from the generic 32/64-bit