Optimization fixes

Two primary fixes.  First, the save/restore mechanism for FP callee saves
was broken if there were any holes in the save mask (the Arm ld/store
multiple instructions for floating point use a start + count mechanism,
rather than the bit-mask mechanism used for core registers).

The second fix corrects a problem introduced by the recent enhancements
to loading floating point literals.  The load->copy optimization mechanism
for literal loads used the value of the loaded literal to identify
redundant loads.  However, it used only the first 32 bits of the
literal - which worked fine previously because 64-bit literal loads
were treated as a pair of 32-bit loads.  The fix was to use the
label of the literal rather than the value in the aliasInfo - which
works for all sizes.

Change-Id: Ic4779adf73b2c7d80059a988b0ecdef39921a81f
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 80118d8..a4e211b 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1853,6 +1853,7 @@
 
     ArmLIR* headLIR = NULL;
 
+    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
     if (bb->blockType == kEntryBlock) {
         /*
          * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
@@ -1882,17 +1883,22 @@
         newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
         /* Need to spill any FP regs? */
         if (cUnit->numFPSpills) {
+            /*
+             * NOTE: fp spills are a little different from core spills in that
+             * they are pushed as a contiguous block.  When promoting from
+             * the fp set, we must allocate all singles from s16..highest-promoted
+             */
             newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
         }
         if (!skipOverflowCheck) {
             opRegRegImm(cUnit, kOpSub, rLR, rSP,
-                        cUnit->frameSize - (cUnit->numSpills * 4));
+                        cUnit->frameSize - (spillCount * 4));
             genRegRegCheck(cUnit, kArmCondCc, rLR, r12, NULL,
                            kArmThrowStackOverflow);
             genRegCopy(cUnit, rSP, rLR);         // Establish stack
         } else {
             opRegImm(cUnit, kOpSub, rSP,
-                     cUnit->frameSize - (cUnit->numSpills * 4));
+                     cUnit->frameSize - (spillCount * 4));
         }
         storeBaseDisp(cUnit, rSP, 0, r0, kWord);
         flushIns(cUnit);
@@ -1902,7 +1908,7 @@
         oatFreeTemp(cUnit, r3);
     } else if (bb->blockType == kExitBlock) {
         newLIR0(cUnit, kArmPseudoMethodExit);
-        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (cUnit->numSpills * 4));
+        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
         /* Need to restore any FP callee saves? */
         if (cUnit->numFPSpills) {
             newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
@@ -2121,7 +2127,8 @@
                 funcOffset =
                     OFFSETOF_MEMBER(Thread, pThrowStackOverflowFromCode);
                 // Restore stack alignment
-                opRegImm(cUnit, kOpAdd, rSP, cUnit->numSpills * 4);
+                opRegImm(cUnit, kOpAdd, rSP,
+                         (cUnit->numCoreSpills + cUnit->numFPSpills) * 4);
                 break;
             default:
                 LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];