Add support to do suspend polling on backward branches in JIT'ed code.

The polling is expensive for now as it is done through three
instructions: ld/ld/branch. As a result, a bunch of bonus stuff has
been worked on to mitigate the extra overhead:

- Cleaned up resource flags for memory disambiguation.
- Rewrote load/store elimination and scheduler routines to hide
  the ld/ld latency for GC flag. Seperate the dependency checking into
  memory disambiguation part and resource conflict part.
- Allowed code motion for Dalvik/constant/non-aliasing loads to be
  hoisted above branches for null/range checks.
- Created extended basic blocks following goto instructions so that
  longer instruction streams can be optimized as a whole.

Without the bonus stuff, the performance dropped about ~5-10% on some
benchmarks because of the lack of headroom to hide the polling latency
in tight loops. With the bonus stuff, the performance delta is between
+/-5% with polling code generated. With the bonus stuff but disabling
polling, the new bonus stuff provides consistent performance
improvements:

CaffeineMark  3.6%
Linpack      11.1%
Scimark       9.7%
Sieve        33.0%
Checkers      6.0%

As a result, GC polling is disabled by default but can be turned on
through the -Xjitsuspendpoll flag for experimental purposes.

Change-Id: Ia81fc85de3e2b70e6cc93bc37c2b845892003cdb
diff --git a/vm/compiler/codegen/arm/CodegenCommon.c b/vm/compiler/codegen/arm/CodegenCommon.c
index f4ca95c..75134bf 100644
--- a/vm/compiler/codegen/arm/CodegenCommon.c
+++ b/vm/compiler/codegen/arm/CodegenCommon.c
@@ -35,14 +35,12 @@
 static void setMemRefType(ArmLIR *lir, bool isLoad, int memType)
 {
     u8 *maskPtr;
-    u8 mask;
-    assert( EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
+    u8 mask = ENCODE_MEM;;
+    assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
     if (isLoad) {
         maskPtr = &lir->useMask;
-        mask = ENCODE_MEM_USE;
     } else {
         maskPtr = &lir->defMask;
-        mask = ENCODE_MEM_DEF;
     }
     /* Clear out the memref flags */
     *maskPtr &= ~mask;
@@ -50,14 +48,19 @@
     switch(memType) {
         case kLiteral:
             assert(isLoad);
-            *maskPtr |= (ENCODE_LITERAL | ENCODE_LITPOOL_REF);
+            *maskPtr |= ENCODE_LITERAL;
             break;
         case kDalvikReg:
-            *maskPtr |= (ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
+            *maskPtr |= ENCODE_DALVIK_REG;
             break;
         case kHeapRef:
             *maskPtr |= ENCODE_HEAP_REF;
             break;
+        case kMustNotAlias:
+            /* Currently only loads can be marked as kMustNotAlias */
+            assert(!(EncodingMap[lir->opcode].flags & IS_STORE));
+            *maskPtr |= ENCODE_MUST_NOT_ALIAS;
+            break;
         default:
             LOGE("Jit: invalid memref kind - %d", memType);
             assert(0);  // Bail if debug build, set worst-case in the field
@@ -138,9 +141,13 @@
         setMemRefType(lir, flags & IS_LOAD, kHeapRef);
     }
 
+    /*
+     * Conservatively assume the branch here will call out a function that in
+     * turn will trash everything.
+     */
     if (flags & IS_BRANCH) {
-        lir->defMask |= ENCODE_REG_PC;
-        lir->useMask |= ENCODE_REG_PC;
+        lir->defMask = lir->useMask = ENCODE_ALL;
+        return;
     }
 
     if (flags & REG_DEF0) {
@@ -176,11 +183,6 @@
         lir->defMask = ENCODE_ALL;
     }
 
-    /* Set up the mask for resources that are used */
-    if (flags & IS_BRANCH) {
-        lir->useMask |= ENCODE_REG_PC;
-    }
-
     if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
         int i;
 
@@ -225,6 +227,37 @@
 }
 
 /*
+ * Set up the accurate resource mask for branch instructions
+ */
+static void relaxBranchMasks(ArmLIR *lir)
+{
+    int flags = EncodingMap[lir->opcode].flags;
+
+    /* Make sure only branch instructions are passed here */
+    assert(flags & IS_BRANCH);
+
+    lir->useMask = lir->defMask = ENCODE_REG_PC;
+
+    if (flags & REG_DEF_LR) {
+        lir->defMask |= ENCODE_REG_LR;
+    }
+
+    if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
+        int i;
+
+        for (i = 0; i < 4; i++) {
+            if (flags & (1 << (kRegUse0 + i))) {
+                setupRegMask(&lir->useMask, lir->operands[i]);
+            }
+        }
+    }
+
+    if (flags & USES_CCODES) {
+        lir->useMask |= ENCODE_CCODE;
+    }
+}
+
+/*
  * The following are building blocks to construct low-level IRs with 0 - 4
  * operands.
  */
@@ -407,5 +440,9 @@
     }
     /* Branch to the PC reconstruction code */
     branch->generic.target = (LIR *) pcrLabel;
+
+    /* Clear the conservative flags for branches that punt to the interpreter */
+    relaxBranchMasks(branch);
+
     return pcrLabel;
 }