Move 64-bit multiplication to helper

We're right on the edge for supporting inline 64-bit arithmetic
with our current temp register pool allocation.  Moving 64-bit multiplication
out of line to sidestep the problem, and added some temp frees to
3-operand long ops.  In the latter case there was a potential problem
if the result long was located in a part of the frame not in the range
of a single base+displacement store.

Change-Id: I6f8e0a11b440ed35e08f2e3457de6cbea89cfccc
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 272cb08..0fb6636 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -697,15 +697,19 @@
  */
 bool oatCompileMethod(Method* method, art::InstructionSet insnSet)
 {
-    if (!method->IsStatic() ||
-        (method->GetName()->ToModifiedUtf8().find("foo") != std::string::npos) ||
-        (method->GetName()->ToModifiedUtf8().find("init>") != std::string::npos) ||
-        (method->GetName()->ToModifiedUtf8().find("longOperTest") != std::string::npos) ||
+    bool compiling = true;
+    if (!method->IsStatic()) {
+        compiling = false;
+    } else if ( (method->GetName()->ToModifiedUtf8().find("foo") != std::string::npos) ||
         (method->GetName()->ToModifiedUtf8().find("main") != std::string::npos)) {
+        compiling = false;
+    }
+
+    if (compiling) {
+        LOG(INFO) << "Compiling " << PrettyMethod(method, true);
+    } else {
         LOG(INFO) << "not compiling " << PrettyMethod(method, true);
         return false;
-    } else {
-        LOG(INFO) << "Compiling " << PrettyMethod(method, true);
     }
 
     CompilationUnit cUnit;
diff --git a/src/compiler/codegen/arm/CalloutHelper.h b/src/compiler/codegen/arm/CalloutHelper.h
deleted file mode 100644
index 933f686..0000000
--- a/src/compiler/codegen/arm/CalloutHelper.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
-#define ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
-
-#include "../../Dalvik.h"
-
-/* Helper functions used at runtime by compiled code */
-
-/* Conversions */
-extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
-extern "C" int __aeabi_f2iz(float op1);            // OP_FLOAT_TO_INT
-extern "C" float __aeabi_d2f(double op1);          // OP_DOUBLE_TO_FLOAT
-extern "C" double __aeabi_f2d(float op1);          // OP_FLOAT_TO_DOUBLE
-extern "C" double __aeabi_i2d(int op1);            // OP_INT_TO_DOUBLE
-extern "C" int __aeabi_d2iz(double op1);           // OP_DOUBLE_TO_INT
-extern "C" float __aeabi_l2f(long op1);            // OP_LONG_TO_FLOAT
-extern "C" double __aeabi_l2d(long op1);           // OP_LONG_TO_DOUBLE
-s8 artF2L(float op1);                // OP_FLOAT_TO_LONG
-s8 artD2L(double op1);               // OP_DOUBLE_TO_LONG
-
-/* Single-precision FP arithmetics */
-extern "C" float __aeabi_fadd(float a, float b);   // OP_ADD_FLOAT[_2ADDR]
-extern "C" float __aeabi_fsub(float a, float b);   // OP_SUB_FLOAT[_2ADDR]
-extern "C" float __aeabi_fdiv(float a, float b);   // OP_DIV_FLOAT[_2ADDR]
-extern "C" float __aeabi_fmul(float a, float b);   // OP_MUL_FLOAT[_2ADDR]
-extern "C" float fmodf(float a, float b);          // OP_REM_FLOAT[_2ADDR]
-
-/* Double-precision FP arithmetics */
-extern "C" double __aeabi_dadd(double a, double b); // OP_ADD_DOUBLE[_2ADDR]
-extern "C" double __aeabi_dsub(double a, double b); // OP_SUB_DOUBLE[_2ADDR]
-extern "C" double __aeabi_ddiv(double a, double b); // OP_DIV_DOUBLE[_2ADDR]
-extern "C" double __aeabi_dmul(double a, double b); // OP_MUL_DOUBLE[_2ADDR]
-extern "C" double fmod(double a, double b);         // OP_REM_DOUBLE[_2ADDR]
-
-/* Integer arithmetics */
-extern "C" int __aeabi_idivmod(int op1, int op2);  // OP_REM_INT[_2ADDR|_LIT8|_LIT16]
-extern "C" int __aeabi_idiv(int op1, int op2);     // OP_DIV_INT[_2ADDR|_LIT8|_LIT16]
-
-/* Long long arithmetics - OP_REM_LONG[_2ADDR] & OP_DIV_LONG[_2ADDR] */
-extern "C" long long __aeabi_ldivmod(long long op1, long long op2);
-
-/* Originally declared in Sync.h */
-bool dvmUnlockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_EXIT
-void dvmLockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_ENTER
-
-/* Originally declared in oo/TypeCheck.h */
-bool dvmCanPutArrayElement(const ClassObject* elemClass,   // OP_APUT_OBJECT
-                           const ClassObject* arrayClass);
-int dvmInstanceofNonTrivial(const ClassObject* instance,   // OP_CHECK_CAST &&
-                            const ClassObject* clazz);     // OP_INSTANCE_OF
-
-/* Originally declared in oo/Array.h */
-ArrayObject* dvmAllocArrayByClass(ClassObject* arrayClass, // OP_NEW_ARRAY
-                                  size_t length, int allocFlags);
-/* Originally declared in alloc/Alloc.h */
-Object* dvmAllocObject(ClassObject* clazz, int flags);  // OP_NEW_INSTANCE
-
-/*
- * The following functions are invoked through the compiler templates (declared
- * in compiler/template/armv5te/footer.S:
- *
- *      __aeabi_cdcmple         // CMPG_DOUBLE
- *      __aeabi_cfcmple         // CMPG_FLOAT
- *      dvmLockObject           // MONITOR_ENTER
- */
-
-/* from mterp/common/FindInterface.h */
-Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
-    u4 methodIdx, const Method* method, DvmDex* methodClassDex);
-
-/* from interp/Interp.cpp */
-bool dvmInterpHandleFillArrayData(ArrayObject* arrayObj, const u2* arrayData);
-
-#endif  // ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 1cf3766..4106e1d 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -25,6 +25,11 @@
                         RegLocation rlSrc)
 {
     oatFlushAllRegs(cUnit);  /* All temps to home location */
+    UNIMPLEMENTED(WARNING) << "Need to handle unresolved";
+    /*
+     * Need new routine that passes Method*, type index, length.
+     * This is unconditional - always go this way.
+     */
     Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
         GetResolvedType(mir->dalvikInsn.vC);
     if (classPtr == NULL) {
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 992eca6..2bdeee6 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -556,6 +556,10 @@
     Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
         GetResolvedType(mir->dalvikInsn.vB);
 
+    /*
+     * Need new routine that passes Method*, type index.
+     * Call unconditionally.
+     */
     if (classPtr == NULL) {
         /* Shouldn't happen */
         LOG(FATAL) << "Unexpected null class pointer";
@@ -678,32 +682,13 @@
     storeValueWide(cUnit, rlDest, rlResult);
 }
 
-/*
- * To avoid possible conflicts, we use a lot of temps here.  Note that
- * our usage of Thumb2 instruction forms avoids the problems with register
- * reuse for multiply instructions prior to arm6.
- */
-static void genMulLong(CompilationUnit* cUnit, RegLocation rlDest,
-                       RegLocation rlSrc1, RegLocation rlSrc2)
+static void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
+                        RegLocation rlFree)
 {
-    RegLocation rlResult;
-    int resLo = oatAllocTemp(cUnit);
-    int resHi = oatAllocTemp(cUnit);
-    int tmp1 = oatAllocTemp(cUnit);
-
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-
-    newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
-    newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
-    newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
-    newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
-    oatFreeTemp(cUnit, tmp1);
-
-    rlResult = oatGetReturnWide(cUnit);
-    rlResult.lowReg = resLo;
-    rlResult.highReg = resHi;
-    storeValueWide(cUnit, rlDest, rlResult);
+    if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
+        oatFreeTemp(cUnit, rlFree.lowReg);
+    if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
+        oatFreeTemp(cUnit, rlFree.lowReg);
 }
 
 static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
@@ -727,6 +712,15 @@
     opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
     opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
                 rlSrc2.highReg);
+    /*
+     * NOTE: If rlDest refers to a frame variable in a large frame, the
+     * following storeValueWide might need to allocate a temp register.
+     * To further work around the lack of a spill capability, explicitly
+     * free any temps from rlSrc1 & rlSrc2 that aren't still live in rlResult.
+     * Remove when spill is functional.
+     */
+    freeRegLocTemps(cUnit, rlResult, rlSrc1);
+    freeRegLocTemps(cUnit, rlResult, rlSrc2);
     storeValueWide(cUnit, rlDest, rlResult);
     oatClobber(cUnit, rLR);
     oatUnmarkTemp(cUnit, rLR);  // Remove lr from the temp pool
@@ -1398,8 +1392,10 @@
             break;
         case OP_MUL_LONG:
         case OP_MUL_LONG_2ADDR:
-            genMulLong(cUnit, rlDest, rlSrc1, rlSrc2);
-            return false;
+            callOut = true;
+            retReg = r0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLmul);
+            break;
         case OP_DIV_LONG:
         case OP_DIV_LONG_2ADDR:
             callOut = true;
diff --git a/src/compiler_test.cc b/src/compiler_test.cc
index 2a47936..6a2f81e 100644
--- a/src/compiler_test.cc
+++ b/src/compiler_test.cc
@@ -172,7 +172,7 @@
 TEST_F(CompilerTest, LongOperTest) {
   CompileDex(kIntMathDex, "kIntMathDex");
   AssertStaticIntMethod("IntMath", "longOperTest", "(JJ)I", 0,
-                        70000000000LL, 3);
+                        70000000000LL, -3LL);
 }
 
 TEST_F(CompilerTest, LongShiftTest) {
@@ -235,7 +235,7 @@
                         19, 20LL, 21LL, 22, 23, 24, 25, 26);
 }
 
-#if 0 // White-list needs some work, must allow some virtual methods through
+#if 0 // Need bdc's new Alloc(Method*, type_idx, [length])
 TEST_F(CompilerTest, VirtualCall) {
   CompileDex(kIntMathDex, "kIntMathDex");
   AssertStaticIntMethod("IntMath", "staticCall", "(I)I", 6,
diff --git a/src/runtime_support.h b/src/runtime_support.h
index 8a7c3ba..b586042 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -41,6 +41,7 @@
 
 /* Long long arithmetics - OP_REM_LONG[_2ADDR] & OP_DIV_LONG[_2ADDR] */
 extern "C" long long __aeabi_ldivmod(long long op1, long long op2);
+extern "C" long long __aeabi_lmul(long long op1, long long op2);
 
 #endif
 
diff --git a/src/thread.cc b/src/thread.cc
index ed902ab..5fdff78 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -59,6 +59,7 @@
   pArtF2l = artF2L;
   pArtD2l = artD2L;
   pLdivmod = __aeabi_ldivmod;
+  pLmul = __aeabi_lmul;
 #endif
   pArtAllocArrayByClass = Array::Alloc;
   pMemcpy = memcpy;
diff --git a/src/thread.h b/src/thread.h
index bb3f63b..b7f639e 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -196,6 +196,7 @@
   double (*pFmod)(double, double);
   int (*pIdivmod)(int, int);
   int (*pIdiv)(int, int);
+  long long (*pLmul)(long long, long long);
   long long (*pLdivmod)(long long, long long);
   bool (*pArtUnlockObject)(struct Thread*, struct Object*);
   bool (*pArtCanPutArrayElementNoThrow)(const struct ClassObject*,