Merge "Optimization fixes" into dalvik-dev
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 934139b..2d4f83e 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -266,7 +266,7 @@
     int numIns;
     int numOuts;
     int numRegs;            // Unlike struct Method, does not include ins
-    int numSpills;          // NOTE: includes numFPSpills
+    int numCoreSpills;
     int numFPSpills;
     int numPadding;         // # of 4-byte padding cells
     int regsOffset;         // sp-relative offset to beginning of Dalvik regs
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index eef6888..f43bf64 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -702,12 +702,12 @@
     cUnit.printMe = compiler.IsVerbose();
     cUnit.printMeVerbose = compiler.IsVerbose();
     cUnit.disableOpt = 0 |
-         (1 << kTrackLiveTemps) |
-         (1 << kLoadStoreElimination) |
-         (1 << kLoadHoisting) |
-         (1 << kSuppressLoads) |
-         (1 << kNullCheckElimination) |
-         (1 << kPromoteRegs) |
+         //(1 << kTrackLiveTemps) |
+         //(1 << kLoadStoreElimination) |
+         //(1 << kLoadHoisting) |
+         //(1 << kSuppressLoads) |
+         //(1 << kNullCheckElimination) |
+         //(1 << kPromoteRegs) |
          0;
 #endif
 
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index e7844b6..aaf9b97 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -233,7 +233,7 @@
 
     cUnit->coreSpillMask = 0;
     cUnit->fpSpillMask = 0;
-    cUnit->numSpills = 0;
+    cUnit->numCoreSpills = 0;
 
     oatDoPromotion(cUnit);
 
@@ -247,9 +247,10 @@
     cUnit->numRegs = cUnit->method->NumRegisters() - cUnit->numIns;
     cUnit->numOuts = cUnit->method->NumOuts();
     cUnit->numPadding = (STACK_ALIGN_WORDS -
-        (cUnit->numSpills + cUnit->numRegs +
+        (cUnit->numCoreSpills + cUnit->numFPSpills + cUnit->numRegs +
          cUnit->numOuts + 2)) & (STACK_ALIGN_WORDS-1);
-    cUnit->frameSize = (cUnit->numSpills + cUnit->numRegs + cUnit->numOuts +
+    cUnit->frameSize = (cUnit->numCoreSpills + cUnit->numFPSpills +
+                        cUnit->numRegs + cUnit->numOuts +
                         cUnit->numPadding + 2) * 4;
     cUnit->insOffset = cUnit->frameSize + 4;
     cUnit->regsOffset = (cUnit->numOuts + cUnit->numPadding + 1) * 4;
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 9df80bd..9690287 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -227,7 +227,7 @@
             coreRegs[i].inUse = true;
             cUnit->coreSpillMask |= (1 << res);
             cUnit->coreVmapTable.push_back(sReg);
-            cUnit->numSpills++;
+            cUnit->numCoreSpills++;
             cUnit->regLocation[sReg].location = kLocPhysReg;
             cUnit->regLocation[sReg].lowReg = res;
             cUnit->regLocation[sReg].home = true;
@@ -238,6 +238,28 @@
 }
 
 /*
+ * Mark a callee-save fp register as promoted.  Note that
+ * vpush/vpop uses contiguous register lists so we must
+ * include any holes in the mask.  Associate holes with
+ * Dalvik register INVALID_REG (-1).
+ */
+STATIC void markPreservedSingle(CompilationUnit* cUnit, int sReg, int reg)
+{
+    DCHECK_GE(reg, FP_REG_MASK + FP_CALLEE_SAVE_BASE);
+    reg = (reg & FP_REG_MASK) - FP_CALLEE_SAVE_BASE;
+    // Ensure fpVmapTable is large enough
+    int tableSize = cUnit->fpVmapTable.size();
+    for (int i = tableSize; i < (reg + 1); i++) {
+        cUnit->fpVmapTable.push_back(INVALID_REG);
+    }
+    // Add the current mapping
+    cUnit->fpVmapTable[reg] = sReg;
+    // Size of fpVmapTable is high-water mark, use to set mask
+    cUnit->numFPSpills = cUnit->fpVmapTable.size();
+    cUnit->fpSpillMask = ((1 << cUnit->numFPSpills) - 1) << FP_CALLEE_SAVE_BASE;
+}
+
+/*
  * Reserve a callee-save fp single register.  Try to fullfill request for
  * even/odd  allocation, but go ahead and allocate anything if not
  * available.  If nothing's available, return -1.
@@ -251,10 +273,7 @@
             ((FPRegs[i].reg & 0x1) == 0) == even) {
             res = FPRegs[i].reg;
             FPRegs[i].inUse = true;
-            cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
-            cUnit->fpVmapTable.push_back(sReg);
-            cUnit->numSpills++;
-            cUnit->numFPSpills++;
+            markPreservedSingle(cUnit, sReg, res);
             cUnit->regLocation[sReg].fpLocation = kLocPhysReg;
             cUnit->regLocation[sReg].fpLowReg = res;
             cUnit->regLocation[sReg].home = true;
@@ -292,10 +311,7 @@
         res = p->reg;
         p->inUse = true;
         DCHECK_EQ((res & 1), 0);
-        cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
-        cUnit->fpVmapTable.push_back(sReg);
-        cUnit->numSpills++;
-        cUnit->numFPSpills ++;
+        markPreservedSingle(cUnit, sReg, res);
     } else {
         RegisterInfo* FPRegs = cUnit->regPool->FPRegs;
         for (int i = 0; i < cUnit->regPool->numFPRegs; i++) {
@@ -306,13 +322,10 @@
                 (FPRegs[i].reg + 1) == FPRegs[i+1].reg) {
                 res = FPRegs[i].reg;
                 FPRegs[i].inUse = true;
-                cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
-                cUnit->fpVmapTable.push_back(sReg);
+                markPreservedSingle(cUnit, sReg, res);
                 FPRegs[i+1].inUse = true;
-                cUnit->fpSpillMask |= (1 << ((res+1) & FP_REG_MASK));
-                cUnit->fpVmapTable.push_back(sReg);
-                cUnit->numSpills += 2;
-                cUnit->numFPSpills += 2;
+                DCHECK_EQ(res + 1, FPRegs[i+1].reg);
+                markPreservedSingle(cUnit, sReg+1, res+1);
                 break;
             }
         }
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index 3ceffae..edce114 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -404,7 +404,8 @@
     LOG(INFO) << "Regs (excluding ins) : " << cUnit->numRegs;
     LOG(INFO) << "Ins                  : " << cUnit->numIns;
     LOG(INFO) << "Outs                 : " << cUnit->numOuts;
-    LOG(INFO) << "Spills               : " << cUnit->numSpills;
+    LOG(INFO) << "CoreSpills           : " << cUnit->numCoreSpills;
+    LOG(INFO) << "FPSpills             : " << cUnit->numFPSpills;
     LOG(INFO) << "Padding              : " << cUnit->numPadding;
     LOG(INFO) << "Frame size           : " << cUnit->frameSize;
     LOG(INFO) << "Start of ins         : " << cUnit->insOffset;
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 3b2e986..99b22e7 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -99,6 +99,8 @@
 #define FP_REG_OFFSET 32
 /* Offset to distinguish DP FP regs */
 #define FP_DOUBLE 64
+/* First FP callee save */
+#define FP_CALLEE_SAVE_BASE 16
 /* Reg types */
 #define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE))
 #define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET)
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 84c3792..9a7c642 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -136,7 +136,7 @@
      * machinery is in place, always spill lr.
      */
     cUnit->coreSpillMask |= (1 << rLR);
-    cUnit->numSpills++;
+    cUnit->numCoreSpills++;
     /*
      * Simple hack for testing register allocation.  Just do a static
      * count of the uses of Dalvik registers.  Note that we examine
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 80118d8..a4e211b 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1853,6 +1853,7 @@
 
     ArmLIR* headLIR = NULL;
 
+    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
     if (bb->blockType == kEntryBlock) {
         /*
          * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
@@ -1882,17 +1883,22 @@
         newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
         /* Need to spill any FP regs? */
         if (cUnit->numFPSpills) {
+            /*
+             * NOTE: fp spills are a little different from core spills in that
+             * they are pushed as a contiguous block.  When promoting from
+             * the fp set, we must allocate all singles from s16..highest-promoted
+             */
             newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
         }
         if (!skipOverflowCheck) {
             opRegRegImm(cUnit, kOpSub, rLR, rSP,
-                        cUnit->frameSize - (cUnit->numSpills * 4));
+                        cUnit->frameSize - (spillCount * 4));
             genRegRegCheck(cUnit, kArmCondCc, rLR, r12, NULL,
                            kArmThrowStackOverflow);
             genRegCopy(cUnit, rSP, rLR);         // Establish stack
         } else {
             opRegImm(cUnit, kOpSub, rSP,
-                     cUnit->frameSize - (cUnit->numSpills * 4));
+                     cUnit->frameSize - (spillCount * 4));
         }
         storeBaseDisp(cUnit, rSP, 0, r0, kWord);
         flushIns(cUnit);
@@ -1902,7 +1908,7 @@
         oatFreeTemp(cUnit, r3);
     } else if (bb->blockType == kExitBlock) {
         newLIR0(cUnit, kArmPseudoMethodExit);
-        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (cUnit->numSpills * 4));
+        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
         /* Need to restore any FP callee saves? */
         if (cUnit->numFPSpills) {
             newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
@@ -2121,7 +2127,8 @@
                 funcOffset =
                     OFFSETOF_MEMBER(Thread, pThrowStackOverflowFromCode);
                 // Restore stack alignment
-                opRegImm(cUnit, kOpAdd, rSP, cUnit->numSpills * 4);
+                opRegImm(cUnit, kOpAdd, rSP,
+                         (cUnit->numCoreSpills + cUnit->numFPSpills) * 4);
                 break;
             default:
                 LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index 3ee23ea..45c7377 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -75,7 +75,7 @@
     loadPcRel->operands[1] = r15pc;
     setupResourceMasks(loadPcRel);
     setMemRefType(loadPcRel, true, kLiteral);
-    loadPcRel->aliasInfo = dataTarget->operands[0];
+    loadPcRel->aliasInfo = (intptr_t)dataTarget;
     oatAppendLIR(cUnit, (LIR* ) loadPcRel);
     return loadPcRel;
 }
@@ -183,7 +183,7 @@
     loadPcRel->operands[0] = rDest;
     setupResourceMasks(loadPcRel);
     setMemRefType(loadPcRel, true, kLiteral);
-    loadPcRel->aliasInfo = dataTarget->operands[0];
+    loadPcRel->aliasInfo = (intptr_t)dataTarget;
     res = loadPcRel;
     oatAppendLIR(cUnit, (LIR* ) loadPcRel);
 
@@ -661,7 +661,7 @@
             loadPcRel->operands[1] = r15pc;
             setupResourceMasks(loadPcRel);
             setMemRefType(loadPcRel, true, kLiteral);
-            loadPcRel->aliasInfo = dataTarget->operands[0];
+            loadPcRel->aliasInfo = (intptr_t)dataTarget;
             oatAppendLIR(cUnit, (LIR* ) loadPcRel);
             res = loadPcRel;
         }