Various optimization fixes

Multiple problems surfaced when register promotion was enabled.  This
CL takes care of a few, but more remain.  The main problems dealt with
here are related to not having data types handy on invokes.  Solved by
intepreting the shorty of the target and updating the operand names
appropriately.

The other problem was a little nastier.  The codgen infrastructure wasn't
expecting wide results to overlap source operands (for example:

    add-long (v0,v1) = (v1,v2) + (v3,v4)

In the old world, the result pair would start with a fresh name
and temps.  In the new world, though, the register promotion mechanism
retains the mappings.  Not a difficult problem to solve, but I'll
need to very carefully examine the existing long op generators to
make sure they can handle overlaps.

Change-Id: I019607620f7a78cadc9e7c57f247806d0a68243d
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 43b8ddc..c247fe7 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -828,9 +828,22 @@
     rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
     rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
     rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
-    opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
-                rlSrc2.highReg);
+    // The longs may overlap - use intermediate temp if so
+    if (rlResult.lowReg == rlSrc1.highReg) {
+        //FIXME: review all long arithmetic ops - there may be more of these
+        int tReg = oatAllocTemp(cUnit);
+        genRegCopy(cUnit, tReg, rlSrc1.highReg);
+        opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg,
+                    rlSrc2.lowReg);
+        opRegRegReg(cUnit, secondOp, rlResult.highReg, tReg,
+                    rlSrc2.highReg);
+        oatFreeTemp(cUnit, tReg);
+    } else {
+        opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg,
+                    rlSrc2.lowReg);
+        opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
+                    rlSrc2.highReg);
+    }
     /*
      * NOTE: If rlDest refers to a frame variable in a large frame, the
      * following storeValueWide might need to allocate a temp register.
@@ -864,6 +877,10 @@
     oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
     // Keep special registers from being allocated
     for (int i = 0; i < numReserved; i++) {
+        if (NO_SUSPEND && (reservedRegs[i] == rSUSPEND)) {
+            //To measure cost of suspend check
+            continue;
+        }
         oatMarkInUse(cUnit, reservedRegs[i]);
     }
     // Mark temp regs - all others not in use can be used for promotion
@@ -873,6 +890,22 @@
     for (int i = 0; i < numFPTemps; i++) {
         oatMarkTemp(cUnit, fpTemps[i]);
     }
+    // Construct the alias map.
+    cUnit->phiAliasMap = (int*)oatNew(cUnit->numSSARegs *
+                                      sizeof(cUnit->phiAliasMap[0]), false);
+    for (int i = 0; i < cUnit->numSSARegs; i++) {
+        cUnit->phiAliasMap[i] = i;
+    }
+    for (MIR* phi = cUnit->phiList; phi; phi = phi->meta.phiNext) {
+        int defReg = phi->ssaRep->defs[0];
+        for (int i = 0; i < phi->ssaRep->numUses; i++) {
+           for (int j = 0; j < cUnit->numSSARegs; j++) {
+               if (cUnit->phiAliasMap[j] == phi->ssaRep->uses[i]) {
+                   cUnit->phiAliasMap[j] = defReg;
+               }
+           }
+        }
+    }
 }
 
 /*
@@ -1674,7 +1707,7 @@
 /* Check if we need to check for pending suspend request */
 STATIC void genSuspendTest(CompilationUnit* cUnit, MIR* mir)
 {
-    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+    if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
@@ -1693,7 +1726,7 @@
 /* Check for pending suspend request.  */
 STATIC void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
 {
-    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+    if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
     oatLockCallTemps(cUnit);   // Explicit register usage