Implemented a new scheduler and FP register allocator.

Improved performance by 50% over existing JIT for some FP benchmarks.
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index fbc0e26..71fc014 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -18,49 +18,40 @@
 #include "vm/compiler/CompilerInternals.h"
 #include "ArmLIR.h"
 
+#define DEBUG_OPT(X)
+
 ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
 
 /* Is this a Dalvik register access? */
 static inline bool isDalvikLoad(ArmLIR *lir)
 {
-    return ((lir->operands[1] == rFP) &&
-            ((lir->opCode == THUMB_LDR_RRI5) ||
-             (lir->opCode == THUMB2_LDR_RRI12) ||
-             (lir->opCode == THUMB2_VLDRS) ||
-             (lir->opCode == THUMB2_VLDRD)));
+    return (lir->useMask != ~0ULL) && (lir->useMask & ENCODE_DALVIK_REG);
 }
 
 static inline bool isDalvikStore(ArmLIR *lir)
 {
-    return ((lir->operands[1] == rFP) &&
-            ((lir->opCode == THUMB_STR_RRI5) ||
-             (lir->opCode == THUMB2_STR_RRI12) ||
-             (lir->opCode == THUMB2_VSTRS) ||
-             (lir->opCode == THUMB2_VSTRD)));
+    return (lir->defMask != ~0ULL) && (lir->defMask & ENCODE_DALVIK_REG);
 }
 
-/* Double regs overlap float regs.  Return true if collision  */
-static bool regClobber(int reg1, int reg2)
+static inline bool isDalvikRegisterPartiallyClobbered(ArmLIR *lir1,
+                                                      ArmLIR *lir2)
 {
-    int reg1a, reg1b;
-    int reg2a, reg2b;
-    if (!FPREG(reg1) || !FPREG(reg2))
-        return (reg1 == reg2);
-    if (DOUBLEREG(reg1)) {
-        reg1a = reg1 & FP_REG_MASK;
-        reg1b = reg1a + 1;
-    } else {
-        reg1a = reg1b = reg1 & FP_REG_MASK;
-    }
-    if (DOUBLEREG(reg2)) {
-        reg2a = reg2 & FP_REG_MASK;
-        reg2b = reg2a + 1;
-    } else {
-        reg2a = reg2b = reg2 & FP_REG_MASK;
-    }
-    return (reg1a == reg2a) || (reg1a == reg2b) ||
-           (reg1b == reg2a) || (reg1b == reg2b);
+  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
+  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
+  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
+  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
+
+  return (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
 }
+
+static void dumpDependentInsnPair(ArmLIR *thisLIR, ArmLIR *checkLIR,
+                                  const char *optimization)
+{
+    LOGD("************ %s ************", optimization);
+    dvmDumpLIRInsn((LIR *) thisLIR, 0);
+    dvmDumpLIRInsn((LIR *) checkLIR, 0);
+}
+
 /*
  * Perform a pass of top-down walk to
  * 1) Eliminate redundant loads and stores
@@ -81,15 +72,18 @@
             continue;
         }
         if (isDalvikStore(thisLIR)) {
-            int dRegId = thisLIR->operands[2];
+            int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
+            int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo);
             int nativeRegId = thisLIR->operands[0];
             ArmLIR *checkLIR;
             int sinkDistance = 0;
             /*
              * Add r15 (pc) to the mask to prevent this instruction
-             * from sinking past branch instructions.
+             * from sinking past branch instructions. Unset the Dalvik register
+             * bit when checking with native resource constraints.
              */
-            u8 stopMask = ENCODE_GP_REG(rpc) | thisLIR->useMask;
+            u8 stopMask = (ENCODE_REG_PC | thisLIR->useMask) &
+                          ~ENCODE_DALVIK_REG;
 
             for (checkLIR = NEXT_LIR(thisLIR);
                  checkLIR != tailLIR;
@@ -97,10 +91,8 @@
 
                 /* Check if a Dalvik register load is redundant */
                 if (isDalvikLoad(checkLIR) &&
-                    checkLIR->operands[2] == dRegId ) {
-                    if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
-                        break;  // TODO: handle gen<=>float copies
-                    }
+                    (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+                    (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
                     /* Insert a move to replace the load */
                     if (checkLIR->operands[0] != nativeRegId) {
                         ArmLIR *moveLIR;
@@ -117,39 +109,34 @@
                     checkLIR->isNop = true;
                     continue;
 
-                /* Found a true output dependency - nuke the previous store */
+                /*
+                 * Found a true output dependency - nuke the previous store.
+                 * The register type doesn't matter here.
+                 */
                 } else if (isDalvikStore(checkLIR) &&
-                           checkLIR->operands[2] == dRegId) {
+                           (checkLIR->aliasInfo == thisLIR->aliasInfo)) {
                     thisLIR->isNop = true;
                     break;
                 /* Find out the latest slot that the store can be sunk into */
                 } else {
-                    bool stopHere = false;
-
                     /* Last instruction reached */
-                    stopHere |= NEXT_LIR(checkLIR) == tailLIR;
-
-                    /*
-                     * Conservatively assume there is a memory dependency
-                     * for st/ld multiples and reg+reg address mode
-                     */
-                    stopHere |= checkLIR->opCode == THUMB_STMIA ||
-                                checkLIR->opCode == THUMB_LDMIA ||
-                                checkLIR->opCode == THUMB_STR_RRR ||
-                                checkLIR->opCode == THUMB_LDR_RRR ||
-                                checkLIR->opCode == THUMB2_STR_RRR ||
-                                checkLIR->opCode == THUMB2_LDR_RRR ||
-                                checkLIR->opCode == THUMB2_STMIA ||
-                                checkLIR->opCode == THUMB2_LDMIA ||
-                                checkLIR->opCode == THUMB2_VLDRD ||
-                                checkLIR->opCode == THUMB2_VSTRD;
-
+                    bool stopHere = (NEXT_LIR(checkLIR) == tailLIR);
 
                     /* Store data is clobbered */
-                    stopHere |= (stopMask & checkLIR->defMask) != 0;
+                    stopHere |= ((stopMask & checkLIR->defMask) != 0);
+
+                    /* Store data partially clobbers the Dalvik register */
+                    if (stopHere == false &&
+                        ((checkLIR->useMask | checkLIR->defMask) &
+                         ENCODE_DALVIK_REG)) {
+                        stopHere = isDalvikRegisterPartiallyClobbered(thisLIR,
+                                                                      checkLIR);
+                    }
 
                     /* Found a new place to put the store - move it here */
                     if (stopHere == true) {
+                        DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+                                                        "SINK STORE"));
                         /* The store can be sunk for at least one cycle */
                         if (sinkDistance != 0) {
                             ArmLIR *newStoreLIR =
@@ -195,25 +182,73 @@
             continue;
         }
         if (isDalvikLoad(thisLIR)) {
-            int dRegId = thisLIR->operands[2];
+            int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
+            int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo);
             int nativeRegId = thisLIR->operands[0];
             ArmLIR *checkLIR;
             int hoistDistance = 0;
-            u8 stopUseMask = ENCODE_GP_REG(rpc) | thisLIR->useMask;
-            u8 stopDefMask = thisLIR->defMask;
+            u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) &
+                             ~ENCODE_DALVIK_REG;
+            u8 stopDefMask = thisLIR->defMask & ~ENCODE_DALVIK_REG;
 
+            /* First check if the load can be completely elinimated */
             for (checkLIR = PREV_LIR(thisLIR);
                  checkLIR != headLIR;
                  checkLIR = PREV_LIR(checkLIR)) {
 
                 if (checkLIR->isNop) continue;
 
+                /*
+                 * Check if the Dalvik register is previously accessed
+                 * with exactly the same type.
+                 */
+                if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
+                    (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+                    (checkLIR->operands[0] == nativeRegId)) {
+                    /*
+                     * If it is previously accessed but with a different type,
+                     * the search will terminate later at the point checking
+                     * for partially overlapping stores.
+                     */
+                    thisLIR->isNop = true;
+                    break;
+                }
+
+                /*
+                 * No earlier use/def can reach this load if:
+                 * 1) Head instruction is reached
+                 * 2) load target register is clobbered
+                 * 3) A branch is seen (stopUseMask has the PC bit set).
+                 */
+                if ((checkLIR == headLIR) ||
+                    (stopUseMask | stopDefMask) & checkLIR->defMask) {
+                    break;
+                }
+
+                /* Store data partially clobbers the Dalvik register */
+                if (isDalvikStore(checkLIR) &&
+                    isDalvikRegisterPartiallyClobbered(thisLIR, checkLIR)) {
+                    break;
+                }
+            }
+
+            /* The load has been eliminated */
+            if (thisLIR->isNop) continue;
+
+            /*
+             * The load cannot be eliminated. See if it can be hoisted to an
+             * earlier spot.
+             */
+            for (checkLIR = PREV_LIR(thisLIR);
+                 /* empty by intention */;
+                 checkLIR = PREV_LIR(checkLIR)) {
+
+                if (checkLIR->isNop) continue;
+
                 /* Check if the current load is redundant */
                 if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
-                    checkLIR->operands[2] == dRegId ) {
-                    if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
-                        break;  // TODO: handle gen<=>float copies
-                    }
+                    (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+                    (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
                     /* Insert a move to replace the load */
                     if (checkLIR->operands[0] != nativeRegId) {
                         ArmLIR *moveLIR;
@@ -226,41 +261,54 @@
                         dvmCompilerInsertLIRAfter((LIR *) checkLIR,
                                                   (LIR *) moveLIR);
                     }
-                    cUnit->printMe = true;
                     thisLIR->isNop = true;
                     break;
 
                 /* Find out if the load can be yanked past the checkLIR */
                 } else {
-                    bool stopHere = false;
-
                     /* Last instruction reached */
-                    stopHere |= PREV_LIR(checkLIR) == headLIR;
-
-                    /*
-                     * Conservatively assume there is a memory dependency
-                     * for st/ld multiples and reg+reg address mode
-                     */
-                    stopHere |= checkLIR->opCode == THUMB_STMIA ||
-                                checkLIR->opCode == THUMB_LDMIA ||
-                                checkLIR->opCode == THUMB_STR_RRR ||
-                                checkLIR->opCode == THUMB_LDR_RRR ||
-                                checkLIR->opCode == THUMB2_STR_RRR ||
-                                checkLIR->opCode == THUMB2_LDR_RRR ||
-                                checkLIR->opCode == THUMB2_STMIA ||
-                                checkLIR->opCode == THUMB2_LDMIA ||
-                                checkLIR->opCode == THUMB2_VLDRD ||
-                                checkLIR->opCode == THUMB2_VSTRD;
+                    bool stopHere = (checkLIR == headLIR);
 
                     /* Base address is clobbered by checkLIR */
-                    stopHere |= (stopUseMask & checkLIR->defMask) != 0;
+                    stopHere |= ((stopUseMask & checkLIR->defMask) != 0);
 
                     /* Load target clobbers use/def in checkLIR */
-                    stopHere |= (stopDefMask &
-                                 (checkLIR->useMask | checkLIR->defMask)) != 0;
+                    stopHere |= ((stopDefMask &
+                                 (checkLIR->useMask | checkLIR->defMask)) != 0);
+
+                    /* Store data partially clobbers the Dalvik register */
+                    if (stopHere == false &&
+                        (checkLIR->defMask & ENCODE_DALVIK_REG)) {
+                        stopHere = isDalvikRegisterPartiallyClobbered(thisLIR,
+                                                                      checkLIR);
+                    }
+
+                    /*
+                     * Stop at an earlier Dalvik load if the offset of checkLIR
+                     * is not less than thisLIR
+                     *
+                     * Experiments show that doing
+                     *
+                     * ldr     r1, [r5, #16]
+                     * ldr     r0, [r5, #20]
+                     *
+                     * is much faster than
+                     *
+                     * ldr     r0, [r5, #20]
+                     * ldr     r1, [r5, #16]
+                     */
+                    if (isDalvikLoad(checkLIR)) {
+                        int dRegId2 =
+                            DECODE_ALIAS_INFO_REG(checkLIR->aliasInfo);
+                        if (dRegId2 <= dRegId) {
+                            stopHere = true;
+                        }
+                    }
 
                     /* Found a new place to put the load - move it here */
                     if (stopHere == true) {
+                        DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+                                                        "HOIST LOAD"));
                         /* The store can be hoisted for at least one cycle */
                         if (hoistDistance != 0) {
                             ArmLIR *newLoadLIR =
@@ -274,13 +322,13 @@
                             dvmCompilerInsertLIRAfter((LIR *) checkLIR,
                                                       (LIR *) newLoadLIR);
                             thisLIR->isNop = true;
-                            cUnit->printMe = true;
                         }
                         break;
                     }
 
                     /*
-                     * Saw a real instruction that the store can be sunk after
+                     * Saw a real instruction that hosting the load is
+                     * beneficial
                      */
                     if (!isPseudoOpCode(checkLIR->opCode)) {
                         hoistDistance++;