Implemented a new scheduler and FP register allocator.

Improved performance by 50% over existing JIT for some FP benchmarks.
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index b9d1a23..b40656d 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -107,11 +107,16 @@
 static inline void resetRegisterScoreboard(CompilationUnit *cUnit)
 {
     RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+    int i;
 
     dvmClearAllBits(registerScoreboard->nullCheckedRegs);
     registerScoreboard->liveDalvikReg = vNone;
     registerScoreboard->nativeReg = vNone;
     registerScoreboard->nativeRegHi = vNone;
+    for (i = 0; i < 32; i++) {
+        registerScoreboard->fp[i] = vNone;
+    }
+    registerScoreboard->nextFP = 0;
 }
 
 /* Kill the corresponding bit in the null-checked register list */
@@ -168,17 +173,6 @@
     } else {
         return (registerScoreboard->nativeReg + 1) & 3;
     }
-
-}
-
-/*
- * Generate a ARM_PSEUDO_IT_BOTTOM marker to indicate the end of an IT block
- */
-static void genITBottom(CompilationUnit *cUnit)
-{
-    ArmLIR *itBottom = newLIR0(cUnit, ARM_PSEUDO_IT_BOTTOM);
-    /* Mark all resources as being clobbered */
-    itBottom->defMask = -1;
 }
 
 /*
@@ -457,42 +451,116 @@
 {
     ArmLIR *res;
     if (vSrcDest > 255) {
-        res = opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone);
-        newLIR3(cUnit, opCode, rSrcDest, r7, 0);
+        opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone);
+        res = newLIR3(cUnit, opCode, rSrcDest, r7, 0);
     } else {
         res = newLIR3(cUnit, opCode, rSrcDest, rFP, vSrcDest);
     }
     return res;
 }
+
+static int nextFPReg(CompilationUnit *cUnit, int dalvikReg, bool isDouble)
+{
+    RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+    int reg;
+
+    if (isDouble) {
+        reg = ((registerScoreboard->nextFP + 1) & ~1) % 32;
+        registerScoreboard->nextFP = reg + 2;
+        registerScoreboard->nextFP %= 32;
+        registerScoreboard->fp[reg] = dalvikReg;
+        return dr0 + reg;
+    }
+    else {
+        reg = registerScoreboard->nextFP++;
+        registerScoreboard->nextFP %= 32;
+        registerScoreboard->fp[reg] = dalvikReg;
+        return fr0 + reg;
+    }
+}
+
+/*
+ * Select a SFP register for the dalvikReg
+ */
+static int selectSFPReg(CompilationUnit *cUnit, int dalvikReg)
+{
+    RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+    int i;
+
+    if (dalvikReg == vNone) {
+        return nextFPReg(cUnit, dalvikReg, false);;
+    }
+
+    for (i = 0; i < 32; i++) {
+        if (registerScoreboard->fp[i] == dalvikReg) {
+            return fr0 + i;
+        }
+    }
+    return nextFPReg(cUnit, dalvikReg, false);;
+}
+
+/*
+ * Select a DFP register for the dalvikReg
+ */
+static int selectDFPReg(CompilationUnit *cUnit, int dalvikReg)
+{
+    RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+    int i;
+
+    if (dalvikReg == vNone) {
+        return nextFPReg(cUnit, dalvikReg, true);;
+    }
+
+    for (i = 0; i < 32; i += 2) {
+        if (registerScoreboard->fp[i] == dalvikReg) {
+            return dr0 + i;
+        }
+    }
+    return nextFPReg(cUnit, dalvikReg, true);
+}
+
 static ArmLIR *loadFloat(CompilationUnit *cUnit, int vSrc, int rDest)
 {
     assert(SINGLEREG(rDest));
-    return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS);
+    ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS);
+    annotateDalvikRegAccess(lir, vSrc, true /* isLoad */);
+    return lir;
 }
 
 /* Store a float to a Dalvik register */
-static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest,
-                          int rScratch)
+static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest)
 {
+    RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+
     assert(SINGLEREG(rSrc));
-    return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS);
+    registerScoreboard->fp[rSrc % 32] = vDest;
+
+    ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS);
+    annotateDalvikRegAccess(lir, vDest, false /* isLoad */);
+    return lir;
 }
 
 /* Load a double from a Dalvik register */
 static ArmLIR *loadDouble(CompilationUnit *cUnit, int vSrc, int rDest)
 {
     assert(DOUBLEREG(rDest));
-    return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD);
+    ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD);
+    annotateDalvikRegAccess(lir, vSrc, true /* isLoad */);
+    return lir;
 }
 
 /* Store a double to a Dalvik register */
-static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest,
-                           int rScratch)
+static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest)
 {
-    assert(DOUBLEREG(rSrc));
-    return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD);
-}
+    RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
 
+    assert(DOUBLEREG(rSrc));
+    registerScoreboard->fp[rSrc % 32] = vDest;
+
+    ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD);
+    annotateDalvikRegAccess(lir, vDest, false /* isLoad */);
+    return lir;
+}
 
 /*
  * Load value from base + displacement.  Optionally perform null check
@@ -507,28 +575,30 @@
                             bool nullCheck, int vReg)
 {
     ArmLIR *first = NULL;
-    ArmLIR *res;
+    ArmLIR *res, *load;
     ArmOpCode opCode = THUMB_BKPT;
     bool shortForm = false;
     bool thumb2Form = (displacement < 4092 && displacement >= 0);
     int shortMax = 128;
     bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
+    int encodedDisp = displacement;
+
     switch (size) {
         case WORD:
             if (LOWREG(rDest) && (rBase == rpc) &&
                 (displacement <= 1020) && (displacement >= 0)) {
                 shortForm = true;
-                displacement >>= 2;
+                encodedDisp >>= 2;
                 opCode = THUMB_LDR_PC_REL;
             } else if (LOWREG(rDest) && (rBase == r13) &&
                       (displacement <= 1020) && (displacement >= 0)) {
                 shortForm = true;
-                displacement >>= 2;
+                encodedDisp >>= 2;
                 opCode = THUMB_LDR_SP_REL;
             } else if (allLowRegs && displacement < 128 && displacement >= 0) {
                 assert((displacement & 0x3) == 0);
                 shortForm = true;
-                displacement >>= 2;
+                encodedDisp >>= 2;
                 opCode = THUMB_LDR_RRI5;
             } else if (thumb2Form) {
                 shortForm = true;
@@ -539,7 +609,7 @@
             if (allLowRegs && displacement < 64 && displacement >= 0) {
                 assert((displacement & 0x1) == 0);
                 shortForm = true;
-                displacement >>= 1;
+                encodedDisp >>= 1;
                 opCode = THUMB_LDRH_RRI5;
             } else if (displacement < 4092 && displacement >= 0) {
                 shortForm = true;
@@ -573,11 +643,15 @@
     if (nullCheck)
         first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL);
     if (shortForm) {
-        res = newLIR3(cUnit, opCode, rDest, rBase, displacement);
+        load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp);
     } else {
         assert(rBase != rDest);
-        res = loadConstant(cUnit, rDest, displacement);
-        loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size);
+        res = loadConstant(cUnit, rDest, encodedDisp);
+        load = loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size);
+    }
+
+    if (rBase == rFP) {
+        annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */);
     }
     return (first) ? first : res;
 }
@@ -586,12 +660,14 @@
                              int displacement, int rSrc, OpSize size,
                              int rScratch)
 {
-    ArmLIR *res;
+    ArmLIR *res, *store;
     ArmOpCode opCode = THUMB_BKPT;
     bool shortForm = false;
     bool thumb2Form = (displacement < 4092 && displacement >= 0);
     int shortMax = 128;
     bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc));
+    int encodedDisp = displacement;
+
     if (rScratch != -1)
         allLowRegs &= LOWREG(rScratch);
     switch (size) {
@@ -599,7 +675,7 @@
             if (allLowRegs && displacement < 128 && displacement >= 0) {
                 assert((displacement & 0x3) == 0);
                 shortForm = true;
-                displacement >>= 2;
+                encodedDisp >>= 2;
                 opCode = THUMB_STR_RRI5;
             } else if (thumb2Form) {
                 shortForm = true;
@@ -611,7 +687,7 @@
             if (displacement < 64 && displacement >= 0) {
                 assert((displacement & 0x1) == 0);
                 shortForm = true;
-                displacement >>= 1;
+                encodedDisp >>= 1;
                 opCode = THUMB_STRH_RRI5;
             } else if (thumb2Form) {
                 shortForm = true;
@@ -632,11 +708,15 @@
             assert(0);
     }
     if (shortForm) {
-        res = newLIR3(cUnit, opCode, rSrc, rBase, displacement);
+        store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp);
     } else {
         assert(rScratch != -1);
-        res = loadConstant(cUnit, rScratch, displacement);
-        storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+        res = loadConstant(cUnit, rScratch, encodedDisp);
+        store = storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+    }
+
+    if (rBase == rFP) {
+        annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */);
     }
     return res;
 }
@@ -1139,7 +1219,7 @@
     branch1->generic.target = (LIR *) genIT(cUnit, ARM_COND_HI, "E");
     newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, r7, modifiedImmediate(-1));
     newLIR2(cUnit, THUMB_MOV_IMM, r7, 1);
-    genITBottom(cUnit);
+    genBarrier(cUnit);
 
     branch2->generic.target = (LIR *) opRegReg(cUnit, OP_NEG, r7, r7);
     branch1->generic.target = (LIR *) storeValue(cUnit, r7, vDest, r4PC);
@@ -1279,7 +1359,7 @@
     //TODO: need assertion mechanism to validate IT region size
     genIT(cUnit, (isMin) ? ARM_COND_GT : ARM_COND_LT, "");
     opRegReg(cUnit, OP_MOV, reg0, reg1);
-    genITBottom(cUnit);
+    genBarrier(cUnit);
     if (vDest >= 0)
         storeValue(cUnit, reg0, vDest, reg1);
     else