Generate code for loops formed with the new builder

Adapt the existing counted loop analysis and range/null check
elimination code to work with the new loop building heuristics.
Cleaned up the old ad-hoc loop builder.

Suspend polling is enabled by default for loops. The backward chaining
cell will be used in self-verification and profiling mode.

If the loop includes accesses to resolved fields/classes, abort code
generation for now and revert to the basic acyclic trace. Added
tests/090-loop-formation to make sure the JIT won't choke on such
instructions.

Change-Id: Idbc57df0a745be3b692f68c1acb6d4861c537f75
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
index 12add75..35d34b3 100644
--- a/vm/compiler/Compiler.h
+++ b/vm/compiler/Compiler.h
@@ -209,6 +209,7 @@
 void dvmInitializeSSAConversion(struct CompilationUnit *cUnit);
 int dvmConvertSSARegToDalvik(const struct CompilationUnit *cUnit, int ssaReg);
 bool dvmCompilerLoopOpt(struct CompilationUnit *cUnit);
+void dvmCompilerInsertBackwardChaining(struct CompilationUnit *cUnit);
 void dvmCompilerNonLoopAnalysis(struct CompilationUnit *cUnit);
 bool dvmCompilerFindLocalLiveIn(struct CompilationUnit *cUnit,
                                 struct BasicBlock *bb);
@@ -239,4 +240,5 @@
 void *dvmCompilerGetInterpretTemplate();
 JitInstructionSetType dvmCompilerGetInterpretTemplateSet();
 u8 dvmGetRegResourceMask(int reg);
+void dvmDumpCFG(struct CompilationUnit *cUnit, const char *dirPrefix);
 #endif /* _DALVIK_VM_COMPILER */
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 7b9987b..bdb69ce 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -54,11 +54,9 @@
     kChainingCellGap,
     /* Don't insert new fields between Gap and Last */
     kChainingCellLast = kChainingCellGap + 1,
-    kMethodEntryBlock,
-    kTraceEntryBlock,
+    kEntryBlock,
     kDalvikByteCode,
-    kTraceExitBlock,
-    kMethodExitBlock,
+    kExitBlock,
     kPCReconstruction,
     kExceptionHandling,
     kCatchEntry,
@@ -255,12 +253,13 @@
      */
     const u2 *switchOverflowPad;
 
-    /* New fields only for method-based compilation */
     JitMode jitMode;
     int numReachableBlocks;
     int numDalvikRegisters;             // method->registersSize + inlined
     BasicBlock *entryBlock;
     BasicBlock *exitBlock;
+    BasicBlock *puntBlock;              // punting to interp for exceptions
+    BasicBlock *backChainBlock;         // for loop-trace
     BasicBlock *curBlock;
     BasicBlock *nextCodegenBlock;       // for extended trace codegen
     GrowableList dfsOrder;
@@ -272,6 +271,7 @@
     BitVector *tempSSARegisterV;        // numSSARegs
     bool printSSANames;
     void *blockLabelList;
+    bool quitLoopMode;                  // cold path/complex bytecode
 } CompilationUnit;
 
 #if defined(WITH_SELF_VERIFICATION)
diff --git a/vm/compiler/Dataflow.c b/vm/compiler/Dataflow.c
index 26068a1..c3355e9 100644
--- a/vm/compiler/Dataflow.c
+++ b/vm/compiler/Dataflow.c
@@ -2190,8 +2190,7 @@
     GrowableList *ivList = cUnit->loopAnalysis->ivList;
     MIR *mir;
 
-    if (bb->blockType != kDalvikByteCode &&
-        bb->blockType != kTraceEntryBlock) {
+    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock) {
         return false;
     }
 
@@ -2395,9 +2394,8 @@
         if (bb == NULL) break;
         if (bb->hidden == true) continue;
         if (bb->blockType == kDalvikByteCode ||
-            bb->blockType == kTraceEntryBlock ||
-            bb->blockType == kMethodEntryBlock ||
-            bb->blockType == kMethodExitBlock) {
+            bb->blockType == kEntryBlock ||
+            bb->blockType == kExitBlock) {
             bb->dataFlowInfo = (BasicBlockDataFlow *)
                 dvmCompilerNew(sizeof(BasicBlockDataFlow),
                                true);
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index 8272fb6..1338acc 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -566,6 +566,7 @@
     /* Handle the fallthrough path */
     bottomBlock->fallThrough = origBlock->fallThrough;
     origBlock->fallThrough = bottomBlock;
+    origBlock->needFallThroughBranch = true;
     dvmCompilerSetBit(bottomBlock->predecessors, origBlock->id);
     if (bottomBlock->fallThrough) {
         dvmCompilerClearBit(bottomBlock->fallThrough->predecessors,
@@ -633,7 +634,7 @@
 }
 
 /* Dump the CFG into a DOT graph */
-void dumpCFG(CompilationUnit *cUnit, const char *dirPrefix)
+void dvmDumpCFG(CompilationUnit *cUnit, const char *dirPrefix)
 {
     const Method *method = cUnit->method;
     FILE *file;
@@ -686,14 +687,16 @@
         BasicBlock *bb = (BasicBlock *) dvmGrowableListGetElement(blockList,
                                                                   blockIdx);
         if (bb == NULL) break;
-        if (bb->blockType == kMethodEntryBlock) {
+        if (bb->blockType == kEntryBlock) {
             fprintf(file, "  entry [shape=Mdiamond];\n");
-        } else if (bb->blockType == kMethodExitBlock) {
+        } else if (bb->blockType == kExitBlock) {
             fprintf(file, "  exit [shape=Mdiamond];\n");
         } else if (bb->blockType == kDalvikByteCode) {
             fprintf(file, "  block%04x [shape=record,label = \"{ \\\n",
                     bb->startOffset);
             const MIR *mir;
+            fprintf(file, "    {block id %d\\l}%s\\\n", bb->id,
+                    bb->firstMIRInsn ? " | " : " ");
             for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
                 fprintf(file, "    {%04x %s\\l}%s\\\n", mir->offset,
                         mir->ssaRep ?
@@ -834,7 +837,7 @@
             char blockName1[BLOCK_NAME_LEN], blockName2[BLOCK_NAME_LEN];
             dvmGetBlockName(bb, blockName1);
             dvmGetBlockName(predBB, blockName2);
-            dumpCFG(cUnit, "/sdcard/cfg/");
+            dvmDumpCFG(cUnit, "/sdcard/cfg/");
             LOGE("Successor %s not found from %s",
                  blockName1, blockName2);
             dvmAbort();
@@ -1186,8 +1189,8 @@
     cUnit.tryBlockAddr = tryBlockAddr;
 
     /* Create the default entry and exit blocks and enter them to the list */
-    BasicBlock *entryBlock = dvmCompilerNewBB(kMethodEntryBlock, numBlocks++);
-    BasicBlock *exitBlock = dvmCompilerNewBB(kMethodExitBlock, numBlocks++);
+    BasicBlock *entryBlock = dvmCompilerNewBB(kEntryBlock, numBlocks++);
+    BasicBlock *exitBlock = dvmCompilerNewBB(kExitBlock, numBlocks++);
 
     cUnit.entryBlock = entryBlock;
     cUnit.exitBlock = exitBlock;
@@ -1308,7 +1311,7 @@
     dvmCompilerMethodMIR2LIR(&cUnit);
 
     // Debugging only
-    //dumpCFG(&cUnit, "/sdcard/cfg/");
+    //dvmDumpCFG(&cUnit, "/sdcard/cfg/");
 
     /* Method is not empty */
     if (cUnit.firstLIRInsn) {
@@ -1348,8 +1351,8 @@
 
     curBlock->visited = true;
 
-    if (curBlock->blockType == kMethodEntryBlock ||
-        curBlock->blockType == kMethodExitBlock) {
+    if (curBlock->blockType == kEntryBlock ||
+        curBlock->blockType == kExitBlock) {
         return false;
     }
 
@@ -1397,6 +1400,34 @@
             break;
         }
         curOffset += width;
+        BasicBlock *nextBlock = findBlock(cUnit, curOffset,
+                                          /* split */
+                                          false,
+                                          /* create */
+                                          false);
+        if (nextBlock) {
+            /*
+             * The next instruction could be the target of a previously parsed
+             * forward branch so a block is already created. If the current
+             * instruction is not an unconditional branch, connect them through
+             * the fall-through link.
+             */
+            assert(curBlock->fallThrough == NULL ||
+                   curBlock->fallThrough == nextBlock ||
+                   curBlock->fallThrough == cUnit->exitBlock);
+
+            if ((curBlock->fallThrough == NULL) &&
+                (flags & kInstrCanContinue)) {
+                curBlock->needFallThroughBranch = true;
+                curBlock->fallThrough = nextBlock;
+                dvmCompilerSetBit(nextBlock->predecessors, curBlock->id);
+            }
+            /* Block has been visited - no more parsing needed */
+            if (nextBlock->visited == true) {
+                return true;
+            }
+            curBlock = nextBlock;
+        }
     }
     return true;
 }
@@ -1410,6 +1441,10 @@
     int numBlocks = 0;
     unsigned int curOffset = startOffset;
     bool changed;
+    BasicBlock *bb;
+#if defined(WITH_JIT_TUNING)
+    CompilerMethodStats *methodStats;
+#endif
 
     cUnit->jitMode = kJitLoop;
 
@@ -1420,9 +1455,9 @@
     dvmInitGrowableList(&cUnit->pcReconstructionList, 8);
 
     /* Create the default entry and exit blocks and enter them to the list */
-    BasicBlock *entryBlock = dvmCompilerNewBB(kMethodEntryBlock, numBlocks++);
+    BasicBlock *entryBlock = dvmCompilerNewBB(kEntryBlock, numBlocks++);
     entryBlock->startOffset = curOffset;
-    BasicBlock *exitBlock = dvmCompilerNewBB(kMethodExitBlock, numBlocks++);
+    BasicBlock *exitBlock = dvmCompilerNewBB(kExitBlock, numBlocks++);
 
     cUnit->entryBlock = entryBlock;
     cUnit->exitBlock = exitBlock;
@@ -1452,6 +1487,20 @@
         changed = exhaustTrace(cUnit, curBlock);
     } while (changed);
 
+    /* Backward chaining block */
+    bb = dvmCompilerNewBB(kChainingCellBackwardBranch, cUnit->numBlocks++);
+    dvmInsertGrowableList(&cUnit->blockList, (intptr_t) bb);
+    cUnit->backChainBlock = bb;
+
+    /* A special block to host PC reconstruction code */
+    bb = dvmCompilerNewBB(kPCReconstruction, cUnit->numBlocks++);
+    dvmInsertGrowableList(&cUnit->blockList, (intptr_t) bb);
+
+    /* And one final block that publishes the PC and raises the exception */
+    bb = dvmCompilerNewBB(kExceptionHandling, cUnit->numBlocks++);
+    dvmInsertGrowableList(&cUnit->blockList, (intptr_t) bb);
+    cUnit->puntBlock = bb;
+
     cUnit->numDalvikRegisters = cUnit->method->registersSize;
 
     /* Verify if all blocks are connected as claimed */
@@ -1465,15 +1514,75 @@
     if (!dvmCompilerBuildLoop(cUnit))
         goto bail;
 
+    dvmCompilerLoopOpt(cUnit);
+
+    /*
+     * Change the backward branch to the backward chaining cell after dataflow
+     * analsys/optimizations are done.
+     */
+    dvmCompilerInsertBackwardChaining(cUnit);
+
     dvmCompilerInitializeRegAlloc(cUnit);
 
     /* Allocate Registers using simple local allocation scheme */
     dvmCompilerLocalRegAlloc(cUnit);
 
-    if (gDvmJit.receivedSIGUSR2) {
-        dumpCFG(cUnit, "/sdcard/cfg/");
+    /* Convert MIR to LIR, etc. */
+    dvmCompilerMIR2LIR(cUnit);
+
+    /* Loop contains never executed blocks / heavy instructions */
+    if (cUnit->quitLoopMode) {
+        if (cUnit->printMe || gDvmJit.receivedSIGUSR2) {
+            LOGD("Loop trace @ offset %04x aborted due to unresolved code info",
+                 cUnit->entryBlock->startOffset);
+        }
+        goto bail;
     }
 
+    /* Convert LIR into machine code. Loop for recoverable retries */
+    do {
+        dvmCompilerAssembleLIR(cUnit, info);
+        cUnit->assemblerRetries++;
+        if (cUnit->printMe && cUnit->assemblerStatus != kSuccess)
+            LOGD("Assembler abort #%d on %d", cUnit->assemblerRetries,
+                  cUnit->assemblerStatus);
+    } while (cUnit->assemblerStatus == kRetryAll);
+
+    /* Loop is too big - bail out */
+    if (cUnit->assemblerStatus == kRetryHalve) {
+        goto bail;
+    }
+
+    if (cUnit->printMe || gDvmJit.receivedSIGUSR2) {
+        LOGD("Loop trace @ offset %04x", cUnit->entryBlock->startOffset);
+        dvmCompilerCodegenDump(cUnit);
+    }
+
+    /*
+     * If this trace uses class objects as constants,
+     * dvmJitInstallClassObjectPointers will switch the thread state
+     * to running and look up the class pointers using the descriptor/loader
+     * tuple stored in the callsite info structure. We need to make this window
+     * as short as possible since it is blocking GC.
+     */
+    if (cUnit->hasClassLiterals && info->codeAddress) {
+        dvmJitInstallClassObjectPointers(cUnit, (char *) info->codeAddress);
+    }
+
+    /*
+     * Since callsiteinfo is allocated from the arena, delay the reset until
+     * class pointers are resolved.
+     */
+    dvmCompilerArenaReset();
+
+    assert(cUnit->assemblerStatus == kSuccess);
+#if defined(WITH_JIT_TUNING)
+    /* Locate the entry to store compilation statistics for this method */
+    methodStats = dvmCompilerAnalyzeMethodBody(desc->method, false);
+    methodStats->nativeSize += cUnit->totalSize;
+#endif
+    return info->codeAddress != NULL;
+
 bail:
     /* Retry the original trace with JIT_OPT_NO_LOOP disabled */
     dvmCompilerArenaReset();
@@ -1539,6 +1648,10 @@
     /* Setup the method */
     cUnit.method = desc->method;
 
+    /* Store the trace descriptor and set the initial mode */
+    cUnit.traceDesc = desc;
+    cUnit.jitMode = kJitTrace;
+
     /* Initialize the PC reconstruction list */
     dvmInitGrowableList(&cUnit.pcReconstructionList, 8);
 
@@ -1620,7 +1733,7 @@
     }
 
     /* Allocate the entry block */
-    curBB = dvmCompilerNewBB(kTraceEntryBlock, numBlocks++);
+    curBB = dvmCompilerNewBB(kEntryBlock, numBlocks++);
     dvmInsertGrowableList(blockList, (intptr_t) curBB);
     curBB->startOffset = curOffset;
 
@@ -1714,7 +1827,6 @@
     for (blockId = 0; blockId < blockList->numUsed; blockId++) {
         curBB = (BasicBlock *) dvmGrowableListGetElement(blockList, blockId);
         MIR *lastInsn = curBB->lastMIRInsn;
-        BasicBlock *backwardCell;
         /* Skip empty blocks */
         if (lastInsn == NULL) {
             continue;
@@ -1743,7 +1855,6 @@
             targetOffset < curOffset &&
             (optHints & JIT_OPT_NO_LOOP) == 0) {
             dvmCompilerArenaReset();
-            /* TODO - constructed loop is abandoned for now */
             return compileLoop(&cUnit, startOffset, desc, numMaxInsts,
                                info, bailPtr, optHints);
         }
@@ -1782,46 +1893,6 @@
         curBB->needFallThroughBranch =
             ((flags & (kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn |
                        kInstrInvoke)) == 0);
-
-        /* Only form a loop if JIT_OPT_NO_LOOP is not set */
-        if (curBB->taken == NULL &&
-            curBB->fallThrough == NULL &&
-            flags == (kInstrCanBranch | kInstrCanContinue) &&
-            fallThroughOffset == entryCodeBB->startOffset &&
-            JIT_OPT_NO_LOOP != (optHints & JIT_OPT_NO_LOOP)) {
-            BasicBlock *loopBranch = curBB;
-            BasicBlock *exitBB;
-            BasicBlock *exitChainingCell;
-
-            if (cUnit.printMe) {
-                LOGD("Natural loop detected!");
-            }
-            exitBB = dvmCompilerNewBB(kTraceExitBlock, numBlocks++);
-            dvmInsertGrowableList(blockList, (intptr_t) exitBB);
-            exitBB->startOffset = targetOffset;
-            exitBB->needFallThroughBranch = true;
-
-            loopBranch->taken = exitBB;
-            dvmCompilerSetBit(exitBB->predecessors, loopBranch->id);
-            backwardCell =
-                dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++);
-            dvmInsertGrowableList(blockList, (intptr_t) backwardCell);
-            backwardCell->startOffset = entryCodeBB->startOffset;
-            loopBranch->fallThrough = backwardCell;
-            dvmCompilerSetBit(backwardCell->predecessors, loopBranch->id);
-
-            /* Create the chaining cell as the fallthrough of the exit block */
-            exitChainingCell = dvmCompilerNewBB(kChainingCellNormal,
-                                                numBlocks++);
-            dvmInsertGrowableList(blockList, (intptr_t) exitChainingCell);
-            exitChainingCell->startOffset = targetOffset;
-
-            exitBB->fallThrough = exitChainingCell;
-            dvmCompilerSetBit(exitChainingCell->predecessors, exitBB->id);
-
-            cUnit.hasLoop = true;
-        }
-
         if (lastInsn->dalvikInsn.opcode == OP_PACKED_SWITCH ||
             lastInsn->dalvikInsn.opcode == OP_SPARSE_SWITCH) {
             int i;
@@ -1936,6 +2007,7 @@
     /* And one final block that publishes the PC and raise the exception */
     curBB = dvmCompilerNewBB(kExceptionHandling, numBlocks++);
     dvmInsertGrowableList(blockList, (intptr_t) curBB);
+    cUnit.puntBlock = curBB;
 
     if (cUnit.printMe) {
         char* signature =
@@ -1953,7 +2025,6 @@
         free(signature);
     }
 
-    cUnit.traceDesc = desc;
     cUnit.numBlocks = numBlocks;
 
     /* Set the instruction set to use (NOTE: later components may change it) */
@@ -1969,26 +2040,7 @@
     /* Preparation for SSA conversion */
     dvmInitializeSSAConversion(&cUnit);
 
-    if (cUnit.hasLoop) {
-        /*
-         * Loop is not optimizable (for example lack of a single induction
-         * variable), punt and recompile the trace with loop optimization
-         * disabled.
-         */
-        bool loopOpt = dvmCompilerLoopOpt(&cUnit);
-        if (loopOpt == false) {
-            if (cUnit.printMe) {
-                LOGD("Loop is not optimizable - retry codegen");
-            }
-            /* Reset the compiler resource pool */
-            dvmCompilerArenaReset();
-            return dvmCompileTrace(desc, cUnit.numInsts, info, bailPtr,
-                                   optHints | JIT_OPT_NO_LOOP);
-        }
-    }
-    else {
-        dvmCompilerNonLoopAnalysis(&cUnit);
-    }
+    dvmCompilerNonLoopAnalysis(&cUnit);
 
     dvmCompilerInitializeRegAlloc(&cUnit);  // Needs to happen after SSA naming
 
diff --git a/vm/compiler/Loop.c b/vm/compiler/Loop.c
index b9ad3d3..ba8714c 100644
--- a/vm/compiler/Loop.c
+++ b/vm/compiler/Loop.c
@@ -21,89 +21,16 @@
 
 #define DEBUG_LOOP(X)
 
-/*
- * Given the current simple natural loops, the phi node placement can be
- * determined in the following fashion:
- *                    entry (B0)
- *              +---v   v
- *              |  loop body (B1)
- *              |       v
- *              |  loop back (B2)
- *              +---+   v
- *                     exit (B3)
- *
- *  1) Add live-ins of B1 to B0 as defs
- *  2) The intersect of defs(B0)/defs(B1) and defs(B2)/def(B0) are the variables
- *     that need PHI nodes in B1.
- */
-static void handlePhiPlacement(CompilationUnit *cUnit)
-{
-    BasicBlock *entry =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 0);
-    BasicBlock *loopBody =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1);
-    BasicBlock *loopBranch =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 2);
-    dvmCopyBitVector(entry->dataFlowInfo->defV,
-                     loopBody->dataFlowInfo->liveInV);
-
-    BitVector *phiV = dvmCompilerAllocBitVector(cUnit->method->registersSize,
-                                                false);
-    BitVector *phi2V = dvmCompilerAllocBitVector(cUnit->method->registersSize,
-                                                 false);
-    dvmIntersectBitVectors(phiV, entry->dataFlowInfo->defV,
-                           loopBody->dataFlowInfo->defV);
-    dvmIntersectBitVectors(phi2V, entry->dataFlowInfo->defV,
-                           loopBranch->dataFlowInfo->defV);
-    dvmUnifyBitVectors(phiV, phiV, phi2V);
-
-    /* Insert the PHI MIRs */
-    int i;
-    for (i = 0; i < cUnit->method->registersSize; i++) {
-        if (!dvmIsBitSet(phiV, i)) {
-            continue;
-        }
-        MIR *phi = (MIR *)dvmCompilerNew(sizeof(MIR), true);
-        phi->dalvikInsn.opcode = kMirOpPhi;
-        phi->dalvikInsn.vA = i;
-        dvmCompilerPrependMIR(loopBody, phi);
-    }
-}
-
-static void fillPhiNodeContents(CompilationUnit *cUnit)
-{
-    BasicBlock *entry =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 0);
-    BasicBlock *loopBody =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1);
-    BasicBlock *loopBranch =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 2);
-    MIR *mir;
-
-    for (mir = loopBody->firstMIRInsn; mir; mir = mir->next) {
-        if (mir->dalvikInsn.opcode != kMirOpPhi) break;
-        int dalvikReg = mir->dalvikInsn.vA;
-
-        mir->ssaRep->numUses = 2;
-        mir->ssaRep->uses = (int *)dvmCompilerNew(sizeof(int) * 2, false);
-        mir->ssaRep->uses[0] =
-            DECODE_REG(entry->dataFlowInfo->dalvikToSSAMap[dalvikReg]);
-        mir->ssaRep->uses[1] =
-            DECODE_REG(loopBranch->dataFlowInfo->dalvikToSSAMap[dalvikReg]);
-    }
-
-
-}
-
 #if 0
 /* Debugging routines */
 static void dumpConstants(CompilationUnit *cUnit)
 {
     int i;
+    LOGE("LOOP starting offset: %x", cUnit->entryBlock->startOffset);
     for (i = 0; i < cUnit->numSSARegs; i++) {
         if (dvmIsBitSet(cUnit->isConstantV, i)) {
             int subNReg = dvmConvertSSARegToDalvik(cUnit, i);
-            LOGE("s%d(v%d_%d) has %d", i,
+            LOGE("CONST: s%d(v%d_%d) has %d", i,
                  DECODE_REG(subNReg), DECODE_SUB(subNReg),
                  cUnit->constantValues[i]);
         }
@@ -114,24 +41,27 @@
 {
     unsigned int i;
     GrowableList *ivList = cUnit->loopAnalysis->ivList;
-    int *ssaToDalvikMap = (int *) cUnit->ssaToDalvikMap->elemList;
 
     for (i = 0; i < ivList->numUsed; i++) {
-        InductionVariableInfo *ivInfo = ivList->elemList[i];
+        InductionVariableInfo *ivInfo =
+            (InductionVariableInfo *) ivList->elemList[i];
+        int iv = dvmConvertSSARegToDalvik(cUnit, ivInfo->ssaReg);
         /* Basic IV */
         if (ivInfo->ssaReg == ivInfo->basicSSAReg) {
-            LOGE("BIV %d: s%d(v%d) + %d", i,
+            LOGE("BIV %d: s%d(v%d_%d) + %d", i,
                  ivInfo->ssaReg,
-                 ssaToDalvikMap[ivInfo->ssaReg] & 0xffff,
+                 DECODE_REG(iv), DECODE_SUB(iv),
                  ivInfo->inc);
         /* Dependent IV */
         } else {
-            LOGE("DIV %d: s%d(v%d) = %d * s%d(v%d) + %d", i,
+            int biv = dvmConvertSSARegToDalvik(cUnit, ivInfo->basicSSAReg);
+
+            LOGE("DIV %d: s%d(v%d_%d) = %d * s%d(v%d_%d) + %d", i,
                  ivInfo->ssaReg,
-                 ssaToDalvikMap[ivInfo->ssaReg] & 0xffff,
+                 DECODE_REG(iv), DECODE_SUB(iv),
                  ivInfo->m,
                  ivInfo->basicSSAReg,
-                 ssaToDalvikMap[ivInfo->basicSSAReg] & 0xffff,
+                 DECODE_REG(biv), DECODE_SUB(biv),
                  ivInfo->c);
         }
     }
@@ -162,6 +92,32 @@
 
 #endif
 
+static BasicBlock *findPredecessorBlock(const CompilationUnit *cUnit,
+                                        const BasicBlock *bb)
+{
+    int numPred = dvmCountSetBits(bb->predecessors);
+    BitVectorIterator bvIterator;
+    dvmBitVectorIteratorInit(bb->predecessors, &bvIterator);
+
+    if (numPred == 1) {
+        int predIdx = dvmBitVectorIteratorNext(&bvIterator);
+        return (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
+                                                        predIdx);
+    /* First loop block */
+    } else if ((numPred == 2) &&
+               dvmIsBitSet(bb->predecessors, cUnit->entryBlock->id)) {
+        while (true) {
+            int predIdx = dvmBitVectorIteratorNext(&bvIterator);
+            if (predIdx == cUnit->entryBlock->id) continue;
+            return (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
+                                                            predIdx);
+        }
+    /* Doesn't support other shape of control flow yet */
+    } else {
+        return NULL;
+    }
+}
+
 /*
  * A loop is considered optimizable if:
  * 1) It has one basic induction variable
@@ -171,11 +127,11 @@
  *
  * Return false if the loop is not optimizable.
  */
-static bool isLoopOptimizable(CompilationUnit *cUnit)
+static bool isSimpleCountedLoop(CompilationUnit *cUnit)
 {
     unsigned int i;
-    BasicBlock *loopBranch =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 2);
+    BasicBlock *loopBranch = findPredecessorBlock(cUnit,
+                                  cUnit->entryBlock->fallThrough);
     LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
 
     if (loopAnalysis->numBasicIV != 1) return false;
@@ -310,8 +266,7 @@
 /* Returns true if the loop body cannot throw any exceptions */
 static bool doLoopBodyCodeMotion(CompilationUnit *cUnit)
 {
-    BasicBlock *loopBody =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1);
+    BasicBlock *loopBody = cUnit->entryBlock->fallThrough;
     MIR *mir;
     bool loopBodyCanThrow = false;
 
@@ -385,7 +340,7 @@
             if (dvmIsBitSet(cUnit->loopAnalysis->isIndVarV,
                             mir->ssaRep->uses[useIdx])) {
                 mir->OptimizationFlags |=
-                    MIR_IGNORE_RANGE_CHECK |  MIR_IGNORE_NULL_CHECK;
+                    MIR_IGNORE_RANGE_CHECK | MIR_IGNORE_NULL_CHECK;
                 updateRangeCheckInfo(cUnit, mir->ssaRep->uses[refIdx],
                                      mir->ssaRep->uses[useIdx]);
             }
@@ -398,8 +353,7 @@
 static void genHoistedChecks(CompilationUnit *cUnit)
 {
     unsigned int i;
-    BasicBlock *entry =
-        (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 0);
+    BasicBlock *entry = cUnit->entryBlock;
     LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
     int globalMaxC = 0;
     int globalMinC = 0;
@@ -460,14 +414,16 @@
             } else if (loopAnalysis->loopBranchOpcode == OP_IF_LTZ) {
                 /* Array index will fall below 0 */
                 if (globalMinC < 0) {
-                    MIR *boundCheckMIR = (MIR *)dvmCompilerNew(sizeof(MIR), true);
+                    MIR *boundCheckMIR = (MIR *)dvmCompilerNew(sizeof(MIR),
+                                                               true);
                     boundCheckMIR->dalvikInsn.opcode = kMirOpPunt;
                     dvmCompilerAppendMIR(entry, boundCheckMIR);
                 }
             } else if (loopAnalysis->loopBranchOpcode == OP_IF_LEZ) {
                 /* Array index will fall below 0 */
                 if (globalMinC < -1) {
-                    MIR *boundCheckMIR = (MIR *)dvmCompilerNew(sizeof(MIR), true);
+                    MIR *boundCheckMIR = (MIR *)dvmCompilerNew(sizeof(MIR),
+                                                               true);
                     boundCheckMIR->dalvikInsn.opcode = kMirOpPunt;
                     dvmCompilerAppendMIR(entry, boundCheckMIR);
                 }
@@ -480,79 +436,6 @@
     }
 }
 
-/*
- * Main entry point to do loop optimization.
- * Return false if sanity checks for loop formation/optimization failed.
- */
-bool dvmCompilerLoopOpt(CompilationUnit *cUnit)
-{
-    LoopAnalysis *loopAnalysis =
-        (LoopAnalysis *)dvmCompilerNew(sizeof(LoopAnalysis), true);
-
-    assert(((BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 0))
-                               ->blockType == kTraceEntryBlock);
-    assert(((BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 2))
-                               ->blockType == kDalvikByteCode);
-    assert(((BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 3))
-                               ->blockType == kTraceExitBlock);
-
-    cUnit->loopAnalysis = loopAnalysis;
-    /*
-     * Find live-in variables to the loop body so that we can fake their
-     * definitions in the entry block.
-     */
-    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerFindLocalLiveIn,
-                                          kAllNodes,
-                                          false /* isIterative */);
-
-    /* Insert phi nodes to the loop body */
-    handlePhiPlacement(cUnit);
-
-    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerDoSSAConversion,
-                                          kAllNodes,
-                                          false /* isIterative */);
-    fillPhiNodeContents(cUnit);
-
-    /* Constant propagation */
-    cUnit->isConstantV = dvmAllocBitVector(cUnit->numSSARegs, false);
-    cUnit->constantValues =
-        (int *)dvmCompilerNew(sizeof(int) * cUnit->numSSARegs,
-                              true);
-    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
-                                          dvmCompilerDoConstantPropagation,
-                                          kAllNodes,
-                                          false /* isIterative */);
-    DEBUG_LOOP(dumpConstants(cUnit);)
-
-    /* Find induction variables - basic and dependent */
-    loopAnalysis->ivList =
-        (GrowableList *)dvmCompilerNew(sizeof(GrowableList), true);
-    dvmInitGrowableList(loopAnalysis->ivList, 4);
-    loopAnalysis->isIndVarV = dvmAllocBitVector(cUnit->numSSARegs, false);
-    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
-                                          dvmCompilerFindInductionVariables,
-                                          kAllNodes,
-                                          false /* isIterative */);
-    DEBUG_LOOP(dumpIVList(cUnit);)
-
-    /* If the loop turns out to be non-optimizable, return early */
-    if (!isLoopOptimizable(cUnit))
-        return false;
-
-    loopAnalysis->arrayAccessInfo =
-        (GrowableList *)dvmCompilerNew(sizeof(GrowableList), true);
-    dvmInitGrowableList(loopAnalysis->arrayAccessInfo, 4);
-    loopAnalysis->bodyIsClean = doLoopBodyCodeMotion(cUnit);
-    DEBUG_LOOP(dumpHoistedChecks(cUnit);)
-
-    /*
-     * Convert the array access information into extended MIR code in the loop
-     * header.
-     */
-    genHoistedChecks(cUnit);
-    return true;
-}
-
 void resetBlockEdges(BasicBlock *bb)
 {
     bb->taken = NULL;
@@ -573,53 +456,72 @@
 
     int numPred = dvmCountSetBits(firstBB->predecessors);
     /*
-     * A loop body should have at least two incoming edges. Here we go with the
-     * simple case and only form loops if numPred == 2.
+     * A loop body should have at least two incoming edges.
      */
-    if (numPred != 2) return false;
+    if (numPred < 2) return false;
 
-    BitVectorIterator bvIterator;
     GrowableList *blockList = &cUnit->blockList;
-    BasicBlock *predBB = NULL;
 
-    dvmBitVectorIteratorInit(firstBB->predecessors, &bvIterator);
-    while (true) {
-        int predIdx = dvmBitVectorIteratorNext(&bvIterator);
-        if (predIdx == -1) break;
-        predBB = (BasicBlock *) dvmGrowableListGetElement(blockList, predIdx);
-        if (predBB != cUnit->entryBlock) break;
-    }
-
-    /* Used to record which block is in the loop */
+    /* Record blocks included in the loop */
     dvmClearAllBits(cUnit->tempBlockV);
 
-    dvmCompilerSetBit(cUnit->tempBlockV, predBB->id);
+    dvmCompilerSetBit(cUnit->tempBlockV, cUnit->entryBlock->id);
+    dvmCompilerSetBit(cUnit->tempBlockV, firstBB->id);
 
-    /* Form a loop by only including iDom block that is also a predecessor */
-    while (predBB != firstBB) {
-        BasicBlock *iDom = predBB->iDom;
-        if (!dvmIsBitSet(predBB->predecessors, iDom->id)) {
-            return false;
-        /*
-         * And don't form nested loops (ie by detecting if the branch target
-         * of iDom dominates iDom).
-         */
-        } else if (iDom->taken &&
-                   dvmIsBitSet(iDom->dominators, iDom->taken->id) &&
-                   iDom != firstBB) {
+    BasicBlock *bodyBB = firstBB;
+
+    /*
+     * First try to include the fall-through block in the loop, then the taken
+     * block. Stop loop formation on the first backward branch that enters the
+     * first block (ie only include the inner-most loop).
+     */
+    while (true) {
+        /* Loop formed */
+        if (bodyBB->taken == firstBB || bodyBB->fallThrough == firstBB) break;
+
+        /* Inner loops formed first - quit */
+        if (bodyBB->fallThrough &&
+            dvmIsBitSet(cUnit->tempBlockV, bodyBB->fallThrough->id)) {
             return false;
         }
-        dvmCompilerSetBit(cUnit->tempBlockV, iDom->id);
-        predBB = iDom;
+        if (bodyBB->taken &&
+            dvmIsBitSet(cUnit->tempBlockV, bodyBB->taken->id)) {
+            return false;
+        }
+
+        if (bodyBB->fallThrough) {
+            if (bodyBB->fallThrough->iDom == bodyBB) {
+                bodyBB = bodyBB->fallThrough;
+                dvmCompilerSetBit(cUnit->tempBlockV, bodyBB->id);
+                /*
+                 * Loop formation to be detected at the beginning of next
+                 * iteration.
+                 */
+                continue;
+            }
+        }
+        if (bodyBB->taken) {
+            if (bodyBB->taken->iDom == bodyBB) {
+                bodyBB = bodyBB->taken;
+                dvmCompilerSetBit(cUnit->tempBlockV, bodyBB->id);
+                /*
+                 * Loop formation to be detected at the beginning of next
+                 * iteration.
+                 */
+                continue;
+            }
+        }
+        /*
+         * Current block is not the immediate dominator of either fallthrough
+         * nor taken block - bail out of loop formation.
+         */
+        return false;
     }
 
-    /* Add the entry block and first block */
-    dvmCompilerSetBit(cUnit->tempBlockV, firstBB->id);
-    dvmCompilerSetBit(cUnit->tempBlockV, cUnit->entryBlock->id);
 
     /* Now mark blocks not included in the loop as hidden */
     GrowableListIterator iterator;
-    dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
+    dvmGrowableListIteratorInit(blockList, &iterator);
     while (true) {
         BasicBlock *bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
         if (bb == NULL) break;
@@ -634,7 +536,7 @@
     dvmCompilerDataFlowAnalysisDispatcher(cUnit, clearPredecessorVector,
                                           kAllNodes, false /* isIterative */);
 
-    dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
+    dvmGrowableListIteratorInit(blockList, &iterator);
     while (true) {
         BasicBlock *bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
         if (bb == NULL) break;
@@ -671,6 +573,89 @@
             assert(bb->successorBlockList.blockListType == kNotUsed);
         }
     }
-
     return true;
 }
+
+/*
+ * Main entry point to do loop optimization.
+ * Return false if sanity checks for loop formation/optimization failed.
+ */
+bool dvmCompilerLoopOpt(CompilationUnit *cUnit)
+{
+    LoopAnalysis *loopAnalysis =
+        (LoopAnalysis *)dvmCompilerNew(sizeof(LoopAnalysis), true);
+    cUnit->loopAnalysis = loopAnalysis;
+
+    /* Constant propagation */
+    cUnit->isConstantV = dvmAllocBitVector(cUnit->numSSARegs, false);
+    cUnit->constantValues =
+        (int *)dvmCompilerNew(sizeof(int) * cUnit->numSSARegs,
+                              true);
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
+                                          dvmCompilerDoConstantPropagation,
+                                          kAllNodes,
+                                          false /* isIterative */);
+    DEBUG_LOOP(dumpConstants(cUnit);)
+
+    /* Find induction variables - basic and dependent */
+    loopAnalysis->ivList =
+        (GrowableList *)dvmCompilerNew(sizeof(GrowableList), true);
+    dvmInitGrowableList(loopAnalysis->ivList, 4);
+    loopAnalysis->isIndVarV = dvmAllocBitVector(cUnit->numSSARegs, false);
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit,
+                                          dvmCompilerFindInductionVariables,
+                                          kAllNodes,
+                                          false /* isIterative */);
+    DEBUG_LOOP(dumpIVList(cUnit);)
+
+    /* Only optimize array accesses for simple counted loop for now */
+    if (!isSimpleCountedLoop(cUnit))
+        return false;
+
+    loopAnalysis->arrayAccessInfo =
+        (GrowableList *)dvmCompilerNew(sizeof(GrowableList), true);
+    dvmInitGrowableList(loopAnalysis->arrayAccessInfo, 4);
+    loopAnalysis->bodyIsClean = doLoopBodyCodeMotion(cUnit);
+    DEBUG_LOOP(dumpHoistedChecks(cUnit);)
+
+    /*
+     * Convert the array access information into extended MIR code in the loop
+     * header.
+     */
+    genHoistedChecks(cUnit);
+    return true;
+}
+
+/*
+ * Select the target block of the backward branch.
+ */
+void dvmCompilerInsertBackwardChaining(CompilationUnit *cUnit)
+{
+    /*
+     * If we are not in self-verification or profiling mode, the backward
+     * branch can go to the entryBlock->fallThrough directly. Suspend polling
+     * code will be generated along the backward branch to honor the suspend
+     * requests.
+     */
+#if !defined(WITH_SELF_VERIFICATION)
+    if (gDvmJit.profileMode != kTraceProfilingContinuous &&
+        gDvmJit.profileMode != kTraceProfilingPeriodicOn) {
+        return;
+    }
+#endif
+    /*
+     * In self-verification or profiling mode, the backward branch is altered
+     * to go to the backward chaining cell. Without using the backward chaining
+     * cell we won't be able to do check-pointing on the target PC, or count the
+     * number of iterations accurately.
+     */
+    BasicBlock *firstBB = cUnit->entryBlock->fallThrough;
+    BasicBlock *backBranchBB = findPredecessorBlock(cUnit, firstBB);
+    if (backBranchBB->taken == firstBB) {
+        backBranchBB->taken = cUnit->backChainBlock;
+    } else {
+        assert(backBranchBB->fallThrough == firstBB);
+        backBranchBB->fallThrough = cUnit->backChainBlock;
+    }
+    cUnit->backChainBlock->startOffset = firstBB->startOffset;
+}
diff --git a/vm/compiler/Loop.h b/vm/compiler/Loop.h
index ec87e57..122817d 100644
--- a/vm/compiler/Loop.h
+++ b/vm/compiler/Loop.h
@@ -36,4 +36,13 @@
 
 bool dvmCompilerFilterLoopBlocks(CompilationUnit *cUnit);
 
+/*
+ * An unexecuted code path may contain unresolved fields or classes. Before we
+ * have a quiet resolver we simply bail out of the loop compilation mode.
+ */
+#define BAIL_LOOP_COMPILATION() if (cUnit->jitMode == kJitLoop) {       \
+                                    cUnit->quitLoopMode = true;         \
+                                    return false;                       \
+                                }
+
 #endif /* _DALVIK_VM_LOOP */
diff --git a/vm/compiler/Ralloc.c b/vm/compiler/Ralloc.c
index d772a31..e2752b1 100644
--- a/vm/compiler/Ralloc.c
+++ b/vm/compiler/Ralloc.c
@@ -27,8 +27,7 @@
 static void inferTypes(CompilationUnit *cUnit, BasicBlock *bb)
 {
     MIR *mir;
-    if (bb->blockType != kDalvikByteCode &&
-        bb->blockType != kTraceEntryBlock)
+    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock)
         return;
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
diff --git a/vm/compiler/SSATransformation.c b/vm/compiler/SSATransformation.c
index b045a1e..6d7dd23 100644
--- a/vm/compiler/SSATransformation.c
+++ b/vm/compiler/SSATransformation.c
@@ -633,5 +633,9 @@
                                           kReachableNodes,
                                           false /* isIterative */);
 
+    if (gDvmJit.receivedSIGUSR2 || gDvmJit.printMe) {
+        dvmDumpCFG(cUnit, "/sdcard/cfg/");
+    }
+
     return true;
 }
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index fb16388..2599e0a 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -165,11 +165,9 @@
         "Backward Branch",
         "Chaining Cell Gap",
         "N/A",
-        "Method Entry Block",
-        "Trace Entry Block",
+        "Entry Block",
         "Code Block",
-        "Trace Exit Block",
-        "Method Exit Block",
+        "Exit Block",
         "PC Reconstruction",
         "Exception Handling",
     };
@@ -389,10 +387,10 @@
 void dvmGetBlockName(BasicBlock *bb, char *name)
 {
     switch (bb->blockType) {
-        case kMethodEntryBlock:
+        case kEntryBlock:
             snprintf(name, BLOCK_NAME_LEN, "entry");
             break;
-        case kMethodExitBlock:
+        case kExitBlock:
             snprintf(name, BLOCK_NAME_LEN, "exit");
             break;
         case kDalvikByteCode:
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index fb28e26..edcbf86 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -316,20 +316,25 @@
             DUMP_SSA_REP(LOGD("-------- %s\n", (char *) dest));
             break;
         case kArmPseudoChainingCellBackwardBranch:
+            LOGD("L%p:\n", lir);
             LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
             break;
         case kArmPseudoChainingCellNormal:
+            LOGD("L%p:\n", lir);
             LOGD("-------- chaining cell (normal): 0x%04x\n", dest);
             break;
         case kArmPseudoChainingCellHot:
+            LOGD("L%p:\n", lir);
             LOGD("-------- chaining cell (hot): 0x%04x\n", dest);
             break;
         case kArmPseudoChainingCellInvokePredicted:
+            LOGD("L%p:\n", lir);
             LOGD("-------- chaining cell (predicted): %s%s\n",
                  dest ? ((Method *) dest)->clazz->descriptor : "",
                  dest ? ((Method *) dest)->name : "N/A");
             break;
         case kArmPseudoChainingCellInvokeSingleton:
+            LOGD("L%p:\n", lir);
             LOGD("-------- chaining cell (invoke singleton): %s%s/%p\n",
                  ((Method *)dest)->clazz->descriptor,
                  ((Method *)dest)->name,
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 0e919c4..dfc68b6 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1001,6 +1001,11 @@
                 lir->operands[1] = 0;
                 lir->generic.target = 0;
                 dvmCompilerSetupResourceMasks(lir);
+                if (cUnit->printMe) {
+                    LOGD("kThumb2Cbnz/kThumb2Cbz@%x: delta=%d",
+                         lir->generic.offset, delta);
+                    dvmCompilerCodegenDump(cUnit);
+                }
                 return kRetryAll;
             } else {
                 lir->operands[1] = delta >> 1;
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 8d8619b..f020e9c 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1356,6 +1356,12 @@
     int flagsToCheck = kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn |
                        kInstrCanThrow;
 
+    // Single stepping is considered loop mode breaker
+    if (cUnit->jitMode == kJitLoop) {
+        cUnit->quitLoopMode = true;
+        return;
+    }
+
     //If already optimized out, just ignore
     if (mir->dalvikInsn.opcode == OP_NOP)
         return;
@@ -1448,7 +1454,8 @@
     /* backward branch? */
     bool backwardBranch = (bb->taken->startOffset <= mir->offset);
 
-    if (backwardBranch && gDvmJit.genSuspendPoll) {
+    if (backwardBranch &&
+        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
         genSuspendPoll(cUnit, mir);
     }
 
@@ -1579,6 +1586,7 @@
               (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]);
 
             if (strPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null string");
                 dvmAbort();
             }
@@ -1595,6 +1603,7 @@
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
 
             if (classPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null class");
                 dvmAbort();
             }
@@ -1631,6 +1640,7 @@
             Opcode opcode = mir->dalvikInsn.opcode;
 
             if (fieldPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null static field");
                 dvmAbort();
             }
@@ -1664,6 +1674,7 @@
               (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
 
             if (fieldPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null static field");
                 dvmAbort();
             }
@@ -1707,6 +1718,12 @@
               (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
             Opcode opcode = mir->dalvikInsn.opcode;
 
+            if (fieldPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
+                LOGE("Unexpected null static field");
+                dvmAbort();
+            }
+
             isVolatile = (opcode == OP_SPUT_VOLATILE) ||
                          (opcode == OP_SPUT_VOLATILE_JUMBO) ||
                          (opcode == OP_SPUT_OBJECT_VOLATILE) ||
@@ -1718,11 +1735,6 @@
                            (opcode == OP_SPUT_OBJECT_VOLATILE) ||
                            (opcode == OP_SPUT_OBJECT_VOLATILE_JUMBO);
 
-            if (fieldPtr == NULL) {
-                LOGE("Unexpected null static field");
-                dvmAbort();
-            }
-
             rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
             rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
             loadConstant(cUnit, tReg,  (int) fieldPtr);
@@ -1755,6 +1767,7 @@
               (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
 
             if (fieldPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null static field");
                 dvmAbort();
             }
@@ -1778,6 +1791,7 @@
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
 
             if (classPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null class");
                 dvmAbort();
             }
@@ -1829,9 +1843,10 @@
              * so that we can tell if it happens frequently.
              */
             if (classPtr == NULL) {
-                 LOGVV("null clazz in OP_CHECK_CAST, single-stepping");
-                 genInterpSingleStep(cUnit, mir);
-                 return false;
+                BAIL_LOOP_COMPILATION();
+                LOGVV("null clazz in OP_CHECK_CAST, single-stepping");
+                genInterpSingleStep(cUnit, mir);
+                return false;
             }
             dvmCompilerFlushAllRegs(cUnit);   /* Everything to home location */
             loadConstant(cUnit, r1, (int) classPtr );
@@ -2093,7 +2108,8 @@
     /* backward branch? */
     bool backwardBranch = (bb->taken->startOffset <= mir->offset);
 
-    if (backwardBranch && gDvmJit.genSuspendPoll) {
+    if (backwardBranch &&
+        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
         genSuspendPoll(cUnit, mir);
     }
 
@@ -2426,6 +2442,7 @@
                 method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
 
             if (fieldPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null instance field");
                 dvmAbort();
             }
@@ -2448,6 +2465,7 @@
               (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
 
             if (classPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGE("Unexpected null class");
                 dvmAbort();
             }
@@ -2499,6 +2517,7 @@
              * so that we can tell if it happens frequently.
              */
             if (classPtr == NULL) {
+                BAIL_LOOP_COMPILATION();
                 LOGD("null clazz in OP_INSTANCE_OF, single-stepping");
                 genInterpSingleStep(cUnit, mir);
                 break;
@@ -2627,7 +2646,8 @@
     /* backward branch? */
     bool backwardBranch = (bb->taken->startOffset <= mir->offset);
 
-    if (backwardBranch && gDvmJit.genSuspendPoll) {
+    if (backwardBranch &&
+        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
         genSuspendPoll(cUnit, mir);
     }
 
@@ -4238,7 +4258,7 @@
             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
         }
 
-        if (bb->blockType == kTraceEntryBlock) {
+        if (bb->blockType == kEntryBlock) {
             labelList[i].opcode = kArmPseudoEntryBlock;
             if (bb->firstMIRInsn == NULL) {
                 continue;
@@ -4246,10 +4266,11 @@
               setupLoopEntryBlock(cUnit, bb,
                                   &labelList[bb->fallThrough->id]);
             }
-        } else if (bb->blockType == kTraceExitBlock) {
+        } else if (bb->blockType == kExitBlock) {
             labelList[i].opcode = kArmPseudoExitBlock;
             goto gen_fallthrough;
         } else if (bb->blockType == kDalvikByteCode) {
+            if (bb->hidden == true) continue;
             labelList[i].opcode = kArmPseudoNormalBlockLabel;
             /* Reset the register state */
             dvmCompilerResetRegPool(cUnit);
@@ -4297,8 +4318,8 @@
                     /* Make sure exception handling block is next */
                     labelList[i].opcode =
                         kArmPseudoPCReconstructionBlockLabel;
-                    assert (i == cUnit->numBlocks - 2);
-                    handlePCReconstruction(cUnit, &labelList[i+1]);
+                    handlePCReconstruction(cUnit,
+                                           &labelList[cUnit->puntBlock->id]);
                     break;
                 case kExceptionHandling:
                     labelList[i].opcode = kArmPseudoEHBlockLabel;
@@ -4510,7 +4531,7 @@
             }
         }
 
-        if (bb->blockType == kTraceEntryBlock) {
+        if (bb->blockType == kEntryBlock) {
             dvmCompilerAppendLIR(cUnit,
                                  (LIR *) cUnit->loopAnalysis->branchToBody);
             dvmCompilerAppendLIR(cUnit,
diff --git a/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c b/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c
index 5a08b60..98d97d8 100644
--- a/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c
+++ b/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c
@@ -255,13 +255,13 @@
 
     ArmLIR *headLIR = NULL;
 
-    if (bb->blockType == kMethodEntryBlock) {
+    if (bb->blockType == kEntryBlock) {
         /* r0 = callsitePC */
         opImm(cUnit, kOpPush, (1 << r0 | 1 << r1 | 1 << r5FP | 1 << r14lr));
         opRegImm(cUnit, kOpSub, r5FP,
                  sizeof(StackSaveArea) + cUnit->method->registersSize * 4);
 
-    } else if (bb->blockType == kMethodExitBlock) {
+    } else if (bb->blockType == kExitBlock) {
         /* No need to pop r0 and r1 */
         opRegImm(cUnit, kOpAdd, r13sp, 8);
         opImm(cUnit, kOpPop, (1 << r5FP | 1 << r15pc));