Implement SSA-based loop optimizations.

For traces of simple natural loops (ie no invokes/side exits) null and range
checks will be hoisted in to entry block.

For acyclic traces SSA representation will be formed but no optimizations are
 applied (for now).

SSA representation will be printed with the normal verbose output. For example:

D/dalvikvm( 1248): Dumping LIR insns
D/dalvikvm( 1248): installed code is at 0x428559d4
D/dalvikvm( 1248): total size is 324 bytes
D/dalvikvm( 1248): 0x428559d4 (0000): data    0x012c(300)
D/dalvikvm( 1248): -------- entry offset: 0x002b
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559d6 (0002): ldr     r0, [r5, #36]
D/dalvikvm( 1248): 0x428559d8 (0004): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559da (0006): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559dc (0008): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559de (000a): subs    r1, #1
D/dalvikvm( 1248): 0x428559e0 (000c): cmp     r1, r0
D/dalvikvm( 1248): 0x428559e2 (000e): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559e4 (0010): ldr     r0, [r5, #40]
D/dalvikvm( 1248): 0x428559e6 (0012): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559e8 (0014): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559ea (0016): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559ec (0018): subs    r1, #1
D/dalvikvm( 1248): 0x428559ee (001a): cmp     r1, r0
D/dalvikvm( 1248): 0x428559f0 (001c): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_NULL_N_RANGE_UP_CHECK
D/dalvikvm( 1248): 0x428559f2 (001e): ldr     r0, [r5, #32]
D/dalvikvm( 1248): 0x428559f4 (0020): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x428559f6 (0022): cbz     r0,0x42855a06
D/dalvikvm( 1248): 0x428559f8 (0024): ldr     r0, [r0, #8]
D/dalvikvm( 1248): 0x428559fa (0026): cmp     r1, r0
D/dalvikvm( 1248): 0x428559fc (0028): bge     0x42855a06
D/dalvikvm( 1248): -------- MIR_OP_LOWER_BOUND_CHECK
D/dalvikvm( 1248): 0x428559fe (002a): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855a00 (002c): cmp     r0, #1
D/dalvikvm( 1248): 0x42855a02 (002e): blt     0x42855a06
D/dalvikvm( 1248): 0x42855a04 (0030): b       0x42855a08
D/dalvikvm( 1248): 0x42855a06 (0032): b       0x42855af0
D/dalvikvm( 1248): L0x002b:
D/dalvikvm( 1248): -------- MIR_OP_PHI
D/dalvikvm( 1248): -------- s20(v11_1) <- s11(v11_0) s46(v11_2)
D/dalvikvm( 1248): -------- dalvik offset: 0x002b @ aget-wide
D/dalvikvm( 1248): -------- s21(v12_1) s22(v13_1) <- s9(v9_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a08 (0034): ldr     r2, [r5, #36]
D/dalvikvm( 1248): 0x42855a0a (0036): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a0c (0038): adds    r2, r2, #16
D/dalvikvm( 1248): 0x42855a0e (003a): lsls    r3, r3, #3
D/dalvikvm( 1248): 0x42855a10 (003c): ldr     r0, [r2, r3]
D/dalvikvm( 1248): 0x42855a12 (003e): adds    r2, r2, #4
D/dalvikvm( 1248): 0x42855a14 (0040): ldr     r1, [r2, r3]
D/dalvikvm( 1248): -------- dalvik offset: 0x002d @ aget-wide
D/dalvikvm( 1248): -------- s23(v14_1) s24(v15_1) <- s10(v10_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a16 (0042): ldr     r3, [r5, #40]
D/dalvikvm( 1248): 0x42855a18 (0044): str     r0, [r5, #48]
D/dalvikvm( 1248): 0x42855a1a (0046): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855a1c (0048): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855a1e (004a): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855a20 (004c): str     r1, [r5, #52]
D/dalvikvm( 1248): 0x42855a22 (004e): ldr     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855a24 (0050): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855a26 (0052): ldr     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x002f @ add-double/2addr
D/dalvikvm( 1248): -------- s25(v12_2) s26(v13_2) <- s21(v12_1) s22(v13_1) s23(v14_1) s24(v15_1)
D/dalvikvm( 1248): 0x42855a28 (0054): str     r1, [r5, #56]
D/dalvikvm( 1248): 0x42855a2a (0056): str     r2, [r5, #60]
D/dalvikvm( 1248): 0x42855a2c (0058): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a30 (005c): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a34 (0060): vadd    d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x0030 @ const/4
D/dalvikvm( 1248): -------- s27(v14_2) <-
D/dalvikvm( 1248): 0x42855a38 (0064): movs    r2, #1
D/dalvikvm( 1248): -------- dalvik offset: 0x0031 @ sub-int
D/dalvikvm( 1248): -------- s28(v14_3) <- s20(v11_1) s27(v14_2)
D/dalvikvm( 1248): 0x42855a3a (0066): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a3c (0068): subs    r0, r3, r2
D/dalvikvm( 1248): -------- dalvik offset: 0x0033 @ aget-wide
D/dalvikvm( 1248): -------- s29(v14_4) s30(v15_2) <- s8(v8_0) s28(v14_3)
D/dalvikvm( 1248): 0x42855a3e (006a): ldr     r3, [r5, #32]
D/dalvikvm( 1248): 0x42855a40 (006c): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855a42 (006e): str     r0, [r5, #56]
D/dalvikvm( 1248): 0x42855a44 (0070): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855a46 (0072): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855a4a (0076): ldr     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855a4c (0078): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855a4e (007a): ldr     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x0035 @ add-double/2addr
D/dalvikvm( 1248): -------- s31(v12_3) s32(v13_3) <- s25(v12_2) s26(v13_2) s29(v14_4) s30(v15_2)
D/dalvikvm( 1248): 0x42855a50 (007c): str     r1, [r5, #56]
D/dalvikvm( 1248): 0x42855a52 (007e): str     r2, [r5, #60]
D/dalvikvm( 1248): 0x42855a54 (0080): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a58 (0084): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a5c (0088): vadd    d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x0036 @ add-int/lit8
D/dalvikvm( 1248): -------- s33(v14_5) <- s20(v11_1)
D/dalvikvm( 1248): 0x42855a60 (008c): ldr     r2, [r5, #44]
D/dalvikvm( 1248): 0x42855a62 (008e): adds    r2, r2, #1
D/dalvikvm( 1248): -------- dalvik offset: 0x0038 @ aget-wide
D/dalvikvm( 1248): -------- s34(v14_6) s35(v15_3) <- s8(v8_0) s33(v14_5)
D/dalvikvm( 1248): 0x42855a64 (0090): ldr     r1, [r5, #32]
D/dalvikvm( 1248): 0x42855a66 (0092): adds    r1, r1, #16
D/dalvikvm( 1248): 0x42855a68 (0094): str     r2, [r5, #56]
D/dalvikvm( 1248): 0x42855a6a (0096): lsls    r2, r2, #3
D/dalvikvm( 1248): 0x42855a6c (0098): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855a70 (009c): ldr     r3, [r1, r2]
D/dalvikvm( 1248): 0x42855a72 (009e): adds    r1, r1, #4
D/dalvikvm( 1248): 0x42855a74 (00a0): ldr     r0, [r1, r2]
D/dalvikvm( 1248): -------- dalvik offset: 0x003a @ add-double/2addr
D/dalvikvm( 1248): -------- s36(v12_4) s37(v13_4) <- s31(v12_3) s32(v13_3) s34(v14_6) s35(v15_3)
D/dalvikvm( 1248): 0x42855a76 (00a2): str     r3, [r5, #56]
D/dalvikvm( 1248): 0x42855a78 (00a4): str     r0, [r5, #60]
D/dalvikvm( 1248): 0x42855a7a (00a6): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855a7e (00aa): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855a82 (00ae): vadd    d0, d1, d2
D/dalvikvm( 1248): 0x42855a86 (00b2): vstr    d0, [r5, #48]
D/dalvikvm( 1248): -------- dalvik offset: 0x003b @ mul-double/2addr
D/dalvikvm( 1248): -------- s38(v12_5) s39(v13_5) <- s36(v12_4) s37(v13_4) s4(v4_0) s5(v5_0)
D/dalvikvm( 1248): 0x42855a8a (00b6): vmov.f64 s2, s0
D/dalvikvm( 1248): 0x42855a8e (00ba): vldr    d2, [r5, #16]
D/dalvikvm( 1248): 0x42855a92 (00be): vmuld   d0, d1, d2
D/dalvikvm( 1248): -------- dalvik offset: 0x003c @ aget-wide
D/dalvikvm( 1248): -------- s40(v14_7) s41(v15_4) <- s8(v8_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855a96 (00c2): ldr     r2, [r5, #32]
D/dalvikvm( 1248): 0x42855a98 (00c4): ldr     r3, [r5, #44]
D/dalvikvm( 1248): 0x42855a9a (00c6): adds    r2, r2, #16
D/dalvikvm( 1248): 0x42855a9c (00c8): lsls    r3, r3, #3
D/dalvikvm( 1248): 0x42855a9e (00ca): vstr    d0, [r5, #48]
D/dalvikvm( 1248): 0x42855aa2 (00ce): ldr     r0, [r2, r3]
D/dalvikvm( 1248): 0x42855aa4 (00d0): adds    r2, r2, #4
D/dalvikvm( 1248): 0x42855aa6 (00d2): ldr     r1, [r2, r3]
D/dalvikvm( 1248): 0x42855aa8 (00d4): str     r0, [r5, #56]
D/dalvikvm( 1248): 0x42855aaa (00d6): str     r1, [r5, #60]
D/dalvikvm( 1248): -------- dalvik offset: 0x003e @ mul-double
D/dalvikvm( 1248): -------- s42(v14_8) s43(v15_5) <- s40(v14_7) s41(v15_4) s16(v16_0) s17(v17_0)
D/dalvikvm( 1248): 0x42855aac (00d8): vldr    d1, [r5, #56]
D/dalvikvm( 1248): 0x42855ab0 (00dc): vldr    d2, [r5, #64]
D/dalvikvm( 1248): 0x42855ab4 (00e0): vmuld   d0, d1, d2
D/dalvikvm( 1248): 0x42855ab8 (00e4): vstr    d0, [r5, #56]
D/dalvikvm( 1248): -------- dalvik offset: 0x0040 @ add-double/2addr
D/dalvikvm( 1248): -------- s44(v12_6) s45(v13_6) <- s38(v12_5) s39(v13_5) s42(v14_8) s43(v15_5)
D/dalvikvm( 1248): 0x42855abc (00e8): vldr    d1, [r5, #48]
D/dalvikvm( 1248): 0x42855ac0 (00ec): vldr    d2, [r5, #56]
D/dalvikvm( 1248): 0x42855ac4 (00f0): vadd    d0, d1, d2
D/dalvikvm( 1248): 0x42855ac8 (00f4): vstr    d0, [r5, #48]
D/dalvikvm( 1248): -------- dalvik offset: 0x0041 @ aput-wide
D/dalvikvm( 1248): -------- s44(v12_6) s45(v13_6) s8(v8_0) s20(v11_1)
D/dalvikvm( 1248): 0x42855acc (00f8): ldr     r3, [r5, #32]
D/dalvikvm( 1248): 0x42855ace (00fa): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855ad0 (00fc): adds    r3, r3, #16
D/dalvikvm( 1248): 0x42855ad2 (00fe): ldr     r1, [r5, #48]
D/dalvikvm( 1248): 0x42855ad4 (0100): ldr     r2, [r5, #52]
D/dalvikvm( 1248): 0x42855ad6 (0102): lsls    r0, r0, #3
D/dalvikvm( 1248): 0x42855ad8 (0104): str     r1, [r3, r0]
D/dalvikvm( 1248): 0x42855ada (0106): adds    r3, r3, #4
D/dalvikvm( 1248): 0x42855adc (0108): str     r2, [r3, r0]
D/dalvikvm( 1248): -------- dalvik offset: 0x0043 @ add-int/lit8
D/dalvikvm( 1248): -------- s46(v11_2) <- s20(v11_1)
D/dalvikvm( 1248): 0x42855ade (010a): ldr     r2, [r5, #44]
D/dalvikvm( 1248): 0x42855ae0 (010c): adds    r2, r2, #1
D/dalvikvm( 1248): 0x42855ae2 (010e): str     r2, [r5, #44]
D/dalvikvm( 1248): -------- dalvik offset: 0x0045 @ goto
D/dalvikvm( 1248): --------
D/dalvikvm( 1248): L0x0029:
D/dalvikvm( 1248): -------- dalvik offset: 0x0029 @ if-ge
D/dalvikvm( 1248): -------- s46(v11_2) s3(v3_0)
D/dalvikvm( 1248): 0x42855ae4 (0110): ldr     r0, [r5, #44]
D/dalvikvm( 1248): 0x42855ae6 (0112): ldr     r1, [r5, #12]
D/dalvikvm( 1248): 0x42855ae8 (0114): cmp     r0, r1
D/dalvikvm( 1248): 0x42855aea (0116): bge     0x42855aee
D/dalvikvm( 1248): 0x42855aec (0118): b       0x42855a08
D/dalvikvm( 1248): -------- exit offset: 0x0046
D/dalvikvm( 1248): 0x42855aee (011a): b       0x42855af8
D/dalvikvm( 1248): -------- reconstruct dalvik PC : 0x42a644d6 @ +0x002b
D/dalvikvm( 1248): 0x42855af0 (011c): ldr     r0, [pc, #32]
D/dalvikvm( 1248): Exception_Handling:
D/dalvikvm( 1248): 0x42855af2 (011e): ldr     r1, [r6, #84]
D/dalvikvm( 1248): 0x42855af4 (0120): blx     r1
D/dalvikvm( 1248): 0x42855af6 (0122): .align4
D/dalvikvm( 1248): -------- chaining cell (normal): 0x0046
D/dalvikvm( 1248): 0x42855af8 (0124): ldr     r0, [r6, #76]
D/dalvikvm( 1248): 0x42855afa (0126): blx     r0
D/dalvikvm( 1248): 0x42855afc (0128): data    0x450c(17676)
D/dalvikvm( 1248): 0x42855afe (012a): data    0x42a6(17062)
D/dalvikvm( 1248): 0x42855b14 (0140): .word (0x42a644d6)
D/dalvikvm( 1248): End Ljnt/scimark2/SOR;execute, 18 Dalvik instructions
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
index c9e6bd6..28e13a5 100644
--- a/vm/compiler/codegen/CompilerCodegen.h
+++ b/vm/compiler/codegen/CompilerCodegen.h
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "../CompilerIR.h"
-
 #ifndef _DALVIK_VM_COMPILERCODEGEN_H_
 #define _DALVIK_VM_COMPILERCODEGEN_H_
 
+#include "compiler/CompilerIR.h"
+
 /* Work unit is architecture dependent */
 bool dvmCompilerDoWork(CompilerWorkOrder *work);
 
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index 1a891b1..432b368 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#include "Dalvik.h"
-#include "compiler/CompilerInternals.h"
-
 #ifndef _DALVIK_VM_COMPILER_OPTIMIZATION_H
 #define _DALVIK_VM_COMPILER_OPTIMIZATION_H
 
+#include "Dalvik.h"
+
 /* Forward declarations */
 struct CompilationUnit;
 struct LIR;
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index 3d55abd..ab46b44 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -204,8 +204,16 @@
     u2 *cPtr = (u2*)baseAddr;
     /* Handle pseudo-ops individually, and all regular insns as a group */
     switch(lir->opCode) {
+        case ARM_PSEUDO_EXTENDED_MIR:
+            /* intentional fallthrough */
+        case ARM_PSEUDO_SSA_REP:
+            LOGD("-------- %s\n", (char *) dest);
+            break;
         case ARM_PSEUDO_TARGET_LABEL:
             break;
+        case ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH:
+            LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
+            break;
         case ARM_PSEUDO_CHAINING_CELL_NORMAL:
             LOGD("-------- chaining cell (normal): 0x%04x\n", dest);
             break;
@@ -220,13 +228,16 @@
                  ((Method *)dest)->name,
                  ((Method *)dest)->insns);
             break;
-        case ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH:
-            LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
+        case ARM_PSEUDO_ENTRY_BLOCK:
+            LOGD("-------- entry offset: 0x%04x\n", dest);
             break;
         case ARM_PSEUDO_DALVIK_BYTECODE_BOUNDARY:
             LOGD("-------- dalvik offset: 0x%04x @ %s\n", dest,
                    getOpcodeName(lir->operands[1]));
             break;
+        case ARM_PSEUDO_EXIT_BLOCK:
+            LOGD("-------- exit offset: 0x%04x\n", dest);
+            break;
         case ARM_PSEUDO_ALIGN4:
             LOGD("%p (%04x): .align4\n", baseAddr + offset, offset);
             break;
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 001486d..87978d8 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -220,6 +220,10 @@
  * Assemble.c.
  */
 typedef enum ArmOpCode {
+    ARM_PSEUDO_EXTENDED_MIR = -16,
+    ARM_PSEUDO_SSA_REP = -15,
+    ARM_PSEUDO_ENTRY_BLOCK = -14,
+    ARM_PSEUDO_EXIT_BLOCK = -13,
     ARM_PSEUDO_TARGET_LABEL = -12,
     ARM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH = -11,
     ARM_PSEUDO_CHAINING_CELL_HOT = -10,
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index d9a29e8..d142ca0 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -24,6 +24,7 @@
  * applicable directory below this one.
  */
 
+#include "compiler/Loop.h"
 
 /* Array holding the entry offset of each template relative to the first one */
 static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
@@ -853,10 +854,22 @@
     loadValue(cUnit, vIndex, reg3);
 
     /* null object? */
-    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
-    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
-    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
-    genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        /* Get len */
+        loadWordDisp(cUnit, reg2, lenOffset, reg0);
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+        genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    } else {
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+    }
 #if !defined(WITH_SELF_VERIFICATION)
     if ((size == LONG) || (size == DOUBLE)) {
         //TUNING: redo.  Make specific wide routine, perhaps use ldmia/fp regs
@@ -933,11 +946,23 @@
     loadValue(cUnit, vIndex, reg3);
 
     /* null object? */
-    ArmLIR * pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset,
-                                         NULL);
-    loadWordDisp(cUnit, reg2, lenOffset, reg0);  /* Get len */
-    opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone); /* reg2 -> array data */
-    genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, vArray, reg2, mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        /* Get len */
+        loadWordDisp(cUnit, reg2, lenOffset, reg0);
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+        genBoundsCheck(cUnit, reg3, reg0, mir->offset, pcrLabel);
+    } else {
+        /* reg2 -> array data */
+        opRegImm(cUnit, OP_ADD, reg2, dataOffset, rNone);
+    }
+
     /* at this point, reg2 points to array, reg3 is unscaled index */
 #if !defined(WITH_SELF_VERIFICATION)
     if ((size == LONG) || (size == DOUBLE)) {
@@ -3414,7 +3439,183 @@
     }
 }
 
-/* Entry function to invoke the backend of the JIT compiler */
+static char *extendedMIROpNames[MIR_OP_LAST - MIR_OP_FIRST] = {
+    "MIR_OP_PHI",
+    "MIR_OP_NULL_N_RANGE_UP_CHECK",
+    "MIR_OP_NULL_N_RANGE_DOWN_CHECK",
+    "MIR_OP_LOWER_BOUND_CHECK",
+    "MIR_OP_PUNT",
+};
+
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int lenOffset = offsetof(ArrayObject, length);
+    const int regArray = 0;
+    const int regIdxEnd = NEXT_REG(regArray);
+    const int regLength = regArray;
+    const int maxC = dInsn->arg[0];
+    const int minC = dInsn->arg[1];
+
+    /* regArray <- arrayRef */
+    loadValue(cUnit, mir->dalvikInsn.vA, regArray);
+    loadValue(cUnit, mir->dalvikInsn.vC, regIdxEnd);
+    genRegImmCheck(cUnit, ARM_COND_EQ, regArray, 0, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+
+    /* regLength <- len(arrayRef) */
+    loadWordDisp(cUnit, regArray, lenOffset, regLength);
+
+    int delta = maxC;
+    /*
+     * If the loop end condition is ">=" instead of ">", then the largest value
+     * of the index is "endCondition - 1".
+     */
+    if (dInsn->arg[2] == OP_IF_GE) {
+        delta--;
+    }
+
+    if (delta) {
+        opRegImm(cUnit, OP_ADD, regIdxEnd, delta, regIdxEnd);
+    }
+    /* Punt if "regIdxEnd < len(Array)" is false */
+    insertRegRegCheck(cUnit, ARM_COND_GE, regIdxEnd, regLength, 0,
+                      (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int lenOffset = offsetof(ArrayObject, length);
+    const int regArray = 0;
+    const int regIdxInit = NEXT_REG(regArray);
+    const int regLength = regArray;
+    const int maxC = dInsn->arg[0];
+    const int minC = dInsn->arg[1];
+
+    /* regArray <- arrayRef */
+    loadValue(cUnit, mir->dalvikInsn.vA, regArray);
+    loadValue(cUnit, mir->dalvikInsn.vB, regIdxInit);
+    genRegImmCheck(cUnit, ARM_COND_EQ, regArray, 0, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+
+    /* regLength <- len(arrayRef) */
+    loadWordDisp(cUnit, regArray, lenOffset, regLength);
+
+    if (maxC) {
+        opRegImm(cUnit, OP_ADD, regIdxInit, maxC, regIdxInit);
+    }
+
+    /* Punt if "regIdxInit < len(Array)" is false */
+    insertRegRegCheck(cUnit, ARM_COND_GE, regIdxInit, regLength, 0,
+                      (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/*
+ * vA = idxReg;
+ * vB = minC;
+ */
+static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
+{
+    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    const int regIdx = 0;
+    const int minC = dInsn->vB;
+
+    /* regIdx <- initial index value */
+    loadValue(cUnit, mir->dalvikInsn.vA, regIdx);
+
+    /* Punt if "regIdxInit + minC >= 0" is false */
+    genRegImmCheck(cUnit, ARM_COND_LT, regIdx, -minC, 0,
+                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+}
+
+/* Extended MIR instructions like PHI */
+static void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
+{
+    int opOffset = mir->dalvikInsn.opCode - MIR_OP_FIRST;
+    char *msg = dvmCompilerNew(strlen(extendedMIROpNames[opOffset]) + 1,
+                               false);
+    strcpy(msg, extendedMIROpNames[opOffset]);
+    newLIR1(cUnit, ARM_PSEUDO_EXTENDED_MIR, (int) msg);
+
+    switch (mir->dalvikInsn.opCode) {
+        case MIR_OP_PHI: {
+            char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
+            newLIR1(cUnit, ARM_PSEUDO_SSA_REP, (int) ssaString);
+            break;
+        }
+        case MIR_OP_NULL_N_RANGE_UP_CHECK: {
+            genHoistedChecksForCountUpLoop(cUnit, mir);
+            break;
+        }
+        case MIR_OP_NULL_N_RANGE_DOWN_CHECK: {
+            genHoistedChecksForCountDownLoop(cUnit, mir);
+            break;
+        }
+        case MIR_OP_LOWER_BOUND_CHECK: {
+            genHoistedLowerBoundCheck(cUnit, mir);
+            break;
+        }
+        case MIR_OP_PUNT: {
+            genUnconditionalBranch(cUnit,
+                                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+/*
+ * Create a PC-reconstruction cell for the starting offset of this trace.
+ * Since the PCR cell is placed near the end of the compiled code which is
+ * usually out of range for a conditional branch, we put two branches (one
+ * branch over to the loop body and one layover branch to the actual PCR) at the
+ * end of the entry block.
+ */
+static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
+                                ArmLIR *bodyLabel)
+{
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    ArmLIR *pcrLabel = dvmCompilerNew(sizeof(ArmLIR), true);
+    pcrLabel->opCode = ARM_PSEUDO_PC_RECONSTRUCTION_CELL;
+    pcrLabel->operands[0] =
+        (int) (cUnit->method->insns + entry->startOffset);
+    pcrLabel->operands[1] = entry->startOffset;
+    /* Insert the place holder to the growable list */
+    dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+
+    /*
+     * Next, create two branches - one branch over to the loop body and the
+     * other branch to the PCR cell to punt.
+     */
+    ArmLIR *branchToBody = dvmCompilerNew(sizeof(ArmLIR), true);
+    branchToBody->opCode = THUMB_B_UNCOND;
+    branchToBody->generic.target = (LIR *) bodyLabel;
+    cUnit->loopAnalysis->branchToBody = (LIR *) branchToBody;
+
+    ArmLIR *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
+    branchToPCR->opCode = THUMB_B_UNCOND;
+    branchToPCR->generic.target = (LIR *) pcrLabel;
+    cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
+}
+
 void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
 {
     /* Used to hold the labels of each block */
@@ -3481,7 +3682,18 @@
             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
         }
 
-        if (blockList[i]->blockType == DALVIK_BYTECODE) {
+        if (blockList[i]->blockType == ENTRY_BLOCK) {
+            labelList[i].opCode = ARM_PSEUDO_ENTRY_BLOCK;
+            if (blockList[i]->firstMIRInsn == NULL) {
+                continue;
+            } else {
+              setupLoopEntryBlock(cUnit, blockList[i],
+                                  &labelList[blockList[i]->fallThrough->id]);
+            }
+        } else if (blockList[i]->blockType == EXIT_BLOCK) {
+            labelList[i].opCode = ARM_PSEUDO_EXIT_BLOCK;
+            goto gen_fallthrough;
+        } else if (blockList[i]->blockType == DALVIK_BYTECODE) {
             labelList[i].opCode = ARM_PSEUDO_NORMAL_BLOCK_LABEL;
             /* Reset the register state */
             resetRegisterScoreboard(cUnit);
@@ -3554,16 +3766,27 @@
         ArmLIR *headLIR = NULL;
 
         for (mir = blockList[i]->firstMIRInsn; mir; mir = mir->next) {
+            if (mir->dalvikInsn.opCode >= MIR_OP_FIRST) {
+                handleExtendedMIR(cUnit, mir);
+                continue;
+            }
+
             OpCode dalvikOpCode = mir->dalvikInsn.opCode;
             InstructionFormat dalvikFormat =
                 dexGetInstrFormat(gDvm.instrFormat, dalvikOpCode);
             ArmLIR *boundaryLIR =
                 newLIR2(cUnit, ARM_PSEUDO_DALVIK_BYTECODE_BOUNDARY,
-                        mir->offset,dalvikOpCode);
+                        mir->offset, dalvikOpCode);
+            if (mir->ssaRep) {
+                char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
+                newLIR1(cUnit, ARM_PSEUDO_SSA_REP, (int) ssaString);
+            }
+
             /* Remember the first LIR for this block */
             if (headLIR == NULL) {
                 headLIR = boundaryLIR;
             }
+
             bool notHandled;
             /*
              * Debugging: screen the opcode first to see if it is in the
@@ -3675,9 +3898,24 @@
                 break;
             }
         }
-        /* Eliminate redundant loads/stores and delay stores into later slots */
-        dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
-                                           cUnit->lastLIRInsn);
+
+        if (blockList[i]->blockType == ENTRY_BLOCK) {
+            dvmCompilerAppendLIR(cUnit,
+                                 (LIR *) cUnit->loopAnalysis->branchToBody);
+            dvmCompilerAppendLIR(cUnit,
+                                 (LIR *) cUnit->loopAnalysis->branchToPCR);
+        }
+
+        if (headLIR) {
+            /*
+             * Eliminate redundant loads/stores and delay stores into later
+             * slots
+             */
+            dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
+                                               cUnit->lastLIRInsn);
+        }
+
+gen_fallthrough:
         /*
          * Check if the block is terminated due to trace length constraint -
          * insert an unconditional branch to the chaining cell.