Loop detection, improved reg allocation

Detect loops and loop nesting depth, and use the latter to
weight register uses (which are then used to determine which
registers to promote).

Also:

   o Fixed typo that prevented squashing of useless fp reg copies

   o Rescheduled array access checks to hide latency of limit load.

   o Add basic-block optimization pass to remove duplicate range
     checks.

   o Fixed bug that prevented recognition of redundant null
     checks following iput-wide and aput-wide.

Change-Id: Icfbae39e89b1d14b8703ad6bbb0b29c0635fed1e
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index bfc5639..34d9fb9 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1341,22 +1341,23 @@
         opRegCopy(cUnit, regPtr, rlArray.lowReg);
     }
 
-    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
-        int regLen = oatAllocTemp(cUnit);
+    bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+    int regLen = INVALID_REG;
+    if (needsRangeCheck) {
+        regLen = oatAllocTemp(cUnit);
         //NOTE: max live temps(4) here.
         /* Get len */
         loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
-        /* regPtr -> array data */
-        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    }
+    /* regPtr -> array data */
+    opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    /* at this point, regPtr points to array, 2 live temps */
+    rlSrc = loadValue(cUnit, rlSrc, regClass);
+    if (needsRangeCheck) {
         genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
                        kThrowArrayBounds);
         oatFreeTemp(cUnit, regLen);
-    } else {
-        /* regPtr -> array data */
-        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
     }
-    /* at this point, regPtr points to array, 2 live temps */
-    rlSrc = loadValue(cUnit, rlSrc, regClass);
     storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
                      scale, kWord);
 }
@@ -1406,21 +1407,15 @@
     }
 #else
     int regPtr = oatAllocTemp(cUnit);
-    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
-        int regLen = oatAllocTemp(cUnit);
+    bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+    int regLen = INVALID_REG;
+    if (needsRangeCheck) {
+        regLen = oatAllocTemp(cUnit);
         /* Get len */
         loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
-        /* regPtr -> array data */
-        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
-        // TODO: change kCondCS to a more meaningful name, is the sense of
-        // carry-set/clear flipped?
-        genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
-                       kThrowArrayBounds);
-        oatFreeTemp(cUnit, regLen);
-    } else {
-        /* regPtr -> array data */
-        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
     }
+    /* regPtr -> array data */
+    opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
     oatFreeTemp(cUnit, rlArray.lowReg);
     if ((size == kLong) || (size == kDouble)) {
         if (scale) {
@@ -1434,6 +1429,13 @@
         oatFreeTemp(cUnit, rlIndex.lowReg);
         rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
 
+        if (needsRangeCheck) {
+            // TODO: change kCondCS to a more meaningful name, is the sense of
+            // carry-set/clear flipped?
+            genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+                           kThrowArrayBounds);
+            oatFreeTemp(cUnit, regLen);
+        }
         loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
 
         oatFreeTemp(cUnit, regPtr);
@@ -1441,6 +1443,13 @@
     } else {
         rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
 
+        if (needsRangeCheck) {
+            // TODO: change kCondCS to a more meaningful name, is the sense of
+            // carry-set/clear flipped?
+            genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+                           kThrowArrayBounds);
+            oatFreeTemp(cUnit, regLen);
+        }
         loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
                         scale, size);
 
@@ -1483,20 +1492,16 @@
     /* null object? */
     genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
 
-    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
-        int regLen = oatAllocTemp(cUnit);
+    bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+    int regLen = INVALID_REG;
+    if (needsRangeCheck) {
+        regLen = oatAllocTemp(cUnit);
         //NOTE: max live temps(4) here.
         /* Get len */
         loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
-        /* regPtr -> array data */
-        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
-        genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
-                       kThrowArrayBounds);
-        oatFreeTemp(cUnit, regLen);
-    } else {
-        /* regPtr -> array data */
-        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
     }
+    /* regPtr -> array data */
+    opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
     /* at this point, regPtr points to array, 2 live temps */
     if ((size == kLong) || (size == kDouble)) {
         //TUNING: specific wide routine that can handle fp regs
@@ -1510,12 +1515,22 @@
         }
         rlSrc = loadValueWide(cUnit, rlSrc, regClass);
 
+        if (needsRangeCheck) {
+            genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+                           kThrowArrayBounds);
+            oatFreeTemp(cUnit, regLen);
+        }
+
         storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
 
         oatFreeTemp(cUnit, regPtr);
     } else {
         rlSrc = loadValue(cUnit, rlSrc, regClass);
-
+        if (needsRangeCheck) {
+            genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+                           kThrowArrayBounds);
+            oatFreeTemp(cUnit, regLen);
+        }
         storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
                          scale, size);
     }