Add runtime support for method based compilation.

Enhanced code cache management to accommodate both trace and method
compilations. Also implemented a hacky dispatch routine for virtual
leaf methods.

Microbenchmark showed 3x speedup in leaf method invocation.

Change-Id: I79d95b7300ba993667b3aa221c1df9c7b0583521
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
index 70a2bbd..9cd4847 100644
--- a/vm/compiler/codegen/CompilerCodegen.h
+++ b/vm/compiler/codegen/CompilerCodegen.h
@@ -28,6 +28,9 @@
 /* Lower middle-level IR to low-level IR */
 void dvmCompilerMIR2LIR(CompilationUnit *cUnit);
 
+/* Lower middle-level IR to low-level IR for the whole method */
+void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit);
+
 /* Assemble LIR into machine code */
 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info);
 
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index d42fe87..2b05476 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -29,6 +29,7 @@
     kTrackLiveTemps,
     kSuppressLoads,
     kMethodInlining,
+    kMethodJit,
 } optControlVector;
 
 /* Forward declarations */
diff --git a/vm/compiler/codegen/arm/ArchFactory.c b/vm/compiler/codegen/arm/ArchFactory.c
index 581ba39..805a6fc 100644
--- a/vm/compiler/codegen/arm/ArchFactory.c
+++ b/vm/compiler/codegen/arm/ArchFactory.c
@@ -32,7 +32,21 @@
                                TGT_LIR *pcrLabel)
 {
     TGT_LIR *branch = genCmpImmBranch(cUnit, cond, reg, checkValue);
-    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+    if (cUnit->methodJitMode) {
+        BasicBlock *bb = cUnit->curBlock;
+        if (bb->taken) {
+            ArmLIR  *exceptionLabel = (ArmLIR *) cUnit->blockLabelList;
+            exceptionLabel += bb->taken->id;
+            branch->generic.target = (LIR *) exceptionLabel;
+            return exceptionLabel;
+        } else {
+            LOGE("Catch blocks not handled yet");
+            dvmAbort();
+            return NULL;
+        }
+    } else {
+        return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+    }
 }
 
 /*
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index 7a4d307..5af4f3b 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -195,9 +195,10 @@
                        }
                        break;
                    case 't':
-                       sprintf(tbuf,"0x%08x",
+                       sprintf(tbuf,"0x%08x (L%p)",
                                (int) baseAddr + lir->generic.offset + 4 +
-                               (operand << 1));
+                               (operand << 1),
+                               lir->generic.target);
                        break;
                    case 'u': {
                        int offset_1 = lir->operands[0];
@@ -302,8 +303,6 @@
         case kArmPseudoSSARep:
             DUMP_SSA_REP(LOGD("-------- %s\n", (char *) dest));
             break;
-        case kArmPseudoTargetLabel:
-            break;
         case kArmPseudoChainingCellBackwardBranch:
             LOGD("-------- chaining cell (backward branch): 0x%04x\n", dest);
             break;
@@ -344,8 +343,9 @@
         case kArmPseudoEHBlockLabel:
             LOGD("Exception_Handling:\n");
             break;
+        case kArmPseudoTargetLabel:
         case kArmPseudoNormalBlockLabel:
-            LOGD("L%#06x:\n", dest);
+            LOGD("L%p:\n", lir);
             break;
         default:
             if (lir->isNop && !dumpNop) {
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 577f683..efae8fd 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1032,6 +1032,17 @@
 
             lir->operands[0] = (delta >> 12) & 0x7ff;
             NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+        } else if (lir->opcode == kThumbBl1) {
+            assert(NEXT_LIR(lir)->opcode == kThumbBl2);
+            /* Both curPC and target are Thumb */
+            intptr_t curPC = startAddr + lir->generic.offset + 4;
+            intptr_t target = lir->operands[1];
+
+            int delta = target - curPC;
+            assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+            lir->operands[0] = (delta >> 12) & 0x7ff;
+            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
         }
 
         ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
@@ -1213,8 +1224,8 @@
     int i;
     ChainCellCounts chainCellCounts;
     int descSize =
-        cUnit->wholeMethod ? 0 : jitTraceDescriptionSize(cUnit->traceDesc);
-    int chainingCellGap;
+        cUnit->methodJitMode ? 0 : jitTraceDescriptionSize(cUnit->traceDesc);
+    int chainingCellGap = 0;
 
     info->instructionSet = cUnit->instructionSet;
 
@@ -1240,30 +1251,34 @@
     /* Const values have to be word aligned */
     offset = (offset + 3) & ~3;
 
-    /*
-     * Get the gap (# of u4) between the offset of chaining cell count and
-     * the bottom of real chaining cells. If the translation has chaining
-     * cells, the gap is guaranteed to be multiples of 4.
-     */
-    chainingCellGap = (offset - cUnit->chainingCellBottom->offset) >> 2;
-
-    /* Add space for chain cell counts & trace description */
     u4 chainCellOffset = offset;
-    ArmLIR *chainCellOffsetLIR = (ArmLIR *) cUnit->chainCellOffsetLIR;
-    assert(chainCellOffsetLIR);
-    assert(chainCellOffset < 0x10000);
-    assert(chainCellOffsetLIR->opcode == kArm16BitData &&
-           chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
+    ArmLIR *chainCellOffsetLIR = NULL;
 
-    /*
-     * Adjust the CHAIN_CELL_OFFSET_TAG LIR's offset to remove the
-     * space occupied by the pointer to the trace profiling counter.
-     */
-    chainCellOffsetLIR->operands[0] = chainCellOffset - 4;
+    if (!cUnit->methodJitMode) {
+        /*
+         * Get the gap (# of u4) between the offset of chaining cell count and
+         * the bottom of real chaining cells. If the translation has chaining
+         * cells, the gap is guaranteed to be multiples of 4.
+         */
+        chainingCellGap = (offset - cUnit->chainingCellBottom->offset) >> 2;
 
-    offset += sizeof(chainCellCounts) + descSize;
+        /* Add space for chain cell counts & trace description */
+        chainCellOffsetLIR = (ArmLIR *) cUnit->chainCellOffsetLIR;
+        assert(chainCellOffsetLIR);
+        assert(chainCellOffset < 0x10000);
+        assert(chainCellOffsetLIR->opcode == kArm16BitData &&
+               chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
 
-    assert((offset & 0x3) == 0);  /* Should still be word aligned */
+        /*
+         * Adjust the CHAIN_CELL_OFFSET_TAG LIR's offset to remove the
+         * space occupied by the pointer to the trace profiling counter.
+         */
+        chainCellOffsetLIR->operands[0] = chainCellOffset - 4;
+
+        offset += sizeof(chainCellCounts) + descSize;
+
+        assert((offset & 0x3) == 0);  /* Should still be word aligned */
+    }
 
     /* Set up offsets for literals */
     cUnit->dataOffset = offset;
@@ -1301,8 +1316,10 @@
             break;
         case kRetryAll:
             if (cUnit->assemblerRetries < MAX_ASSEMBLER_RETRIES) {
-                /* Restore pristine chain cell marker on retry */
-                chainCellOffsetLIR->operands[0] = CHAIN_CELL_OFFSET_TAG;
+                if (!cUnit->methodJitMode) {
+                    /* Restore pristine chain cell marker on retry */
+                    chainCellOffsetLIR->operands[0] = CHAIN_CELL_OFFSET_TAG;
+                }
                 return;
             }
             /* Too many retries - reset and try cutting the trace in half */
@@ -1351,21 +1368,24 @@
     memcpy((char*)cUnit->baseAddr, cUnit->codeBuffer, chainCellOffset);
     gDvmJit.numCompilations++;
 
-    /* Install the chaining cell counts */
-    for (i=0; i< kChainingCellGap; i++) {
-        chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
+    if (!cUnit->methodJitMode) {
+        /* Install the chaining cell counts */
+        for (i=0; i< kChainingCellGap; i++) {
+            chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
+        }
+
+        /* Set the gap number in the chaining cell count structure */
+        chainCellCounts.u.count[kChainingCellGap] = chainingCellGap;
+
+        memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts,
+               sizeof(chainCellCounts));
+
+        /* Install the trace description */
+        memcpy((char*) cUnit->baseAddr + chainCellOffset +
+                       sizeof(chainCellCounts),
+               cUnit->traceDesc, descSize);
     }
 
-    /* Set the gap number in the chaining cell count structure */
-    chainCellCounts.u.count[kChainingCellGap] = chainingCellGap;
-
-    memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts,
-           sizeof(chainCellCounts));
-
-    /* Install the trace description */
-    memcpy((char*)cUnit->baseAddr + chainCellOffset + sizeof(chainCellCounts),
-           cUnit->traceDesc, descSize);
-
     /* Write the literals directly into the code cache */
     installDataContent(cUnit);
 
@@ -1609,7 +1629,7 @@
         PROTECT_CODE_CACHE(cell, sizeof(*cell));
         goto done;
     }
-    int tgtAddr = (int) dvmJitGetCodeAddr(method->insns);
+    int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
 
     /*
      * Compilation not made yet for the callee. Reset the counter to a small
@@ -1808,14 +1828,16 @@
 
         for (i = 0; i < gDvmJit.jitTableSize; i++) {
             if (gDvmJit.pJitEntryTable[i].dPC &&
-                   gDvmJit.pJitEntryTable[i].codeAddress &&
-                   (gDvmJit.pJitEntryTable[i].codeAddress !=
-                    dvmCompilerGetInterpretTemplate())) {
+                !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
+                gDvmJit.pJitEntryTable[i].codeAddress &&
+                (gDvmJit.pJitEntryTable[i].codeAddress !=
+                 dvmCompilerGetInterpretTemplate())) {
                 u4* lastAddress;
                 lastAddress =
                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
                 if (lowAddress == NULL ||
-                      (u4*)gDvmJit.pJitEntryTable[i].codeAddress < lowAddress)
+                      (u4*)gDvmJit.pJitEntryTable[i].codeAddress <
+                      lowAddress)
                     lowAddress = lastAddress;
                 if (lastAddress > highAddress)
                     highAddress = lastAddress;
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index f017b31..8e1b09c 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -904,24 +904,28 @@
 /* Perform the actual operation for OP_RETURN_* */
 static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
 {
-    genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ?
-        TEMPLATE_RETURN_PROF :
-        TEMPLATE_RETURN);
+    if (!cUnit->methodJitMode) {
+        genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ?
+            TEMPLATE_RETURN_PROF :
+            TEMPLATE_RETURN);
 #if defined(WITH_JIT_TUNING)
-    gDvmJit.returnOp++;
+        gDvmJit.returnOp++;
 #endif
-    int dPC = (int) (cUnit->method->insns + mir->offset);
-    /* Insert branch, but defer setting of target */
-    ArmLIR *branch = genUnconditionalBranch(cUnit, NULL);
-    /* Set up the place holder to reconstruct this Dalvik PC */
-    ArmLIR *pcrLabel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
-    pcrLabel->opcode = kArmPseudoPCReconstructionCell;
-    pcrLabel->operands[0] = dPC;
-    pcrLabel->operands[1] = mir->offset;
-    /* Insert the place holder to the growable list */
-    dvmInsertGrowableList(&cUnit->pcReconstructionList, (intptr_t) pcrLabel);
-    /* Branch to the PC reconstruction code */
-    branch->generic.target = (LIR *) pcrLabel;
+        int dPC = (int) (cUnit->method->insns + mir->offset);
+        /* Insert branch, but defer setting of target */
+        ArmLIR *branch = genUnconditionalBranch(cUnit, NULL);
+        /* Set up the place holder to reconstruct this Dalvik PC */
+        ArmLIR *pcrLabel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
+        pcrLabel->opcode = kArmPseudoPCReconstructionCell;
+        pcrLabel->operands[0] = dPC;
+        pcrLabel->operands[1] = mir->offset;
+        /* Insert the place holder to the growable list */
+        dvmInsertGrowableList(&cUnit->pcReconstructionList,
+                              (intptr_t) pcrLabel);
+        /* Branch to the PC reconstruction code */
+        branch->generic.target = (LIR *) pcrLabel;
+    }
+    /* TODO: Move result to InterpState for non-void returns */
 }
 
 static void genProcessArgsNoRange(CompilationUnit *cUnit, MIR *mir,
@@ -3197,6 +3201,25 @@
     return false;
 }
 
+/* "this" pointer is already in r0 */
+static void genValidationForMethodCallee(CompilationUnit *cUnit, MIR *mir,
+                                            ArmLIR **classCheck)
+{
+    CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
+    dvmCompilerLockAllTemps(cUnit);
+
+    loadConstant(cUnit, r1, (int) callsiteInfo->clazz);
+
+    loadWordDisp(cUnit, r0, offsetof(Object, clazz), r2);
+    /* Branch to the slow path if classes are not equal */
+    opRegReg(cUnit, kOpCmp, r1, r2);
+    /*
+     * Set the misPredBranchOver target so that it will be generated when the
+     * code for the non-optimized invoke is generated.
+     */
+    *classCheck = opCondBranch(cUnit, kArmCondNe);
+}
+
 static bool handleFmt35ms_3rms(CompilationUnit *cUnit, MIR *mir,
                                BasicBlock *bb, ArmLIR *labelList)
 {
@@ -3229,6 +3252,29 @@
             else
                 genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
 
+
+            if (mir->OptimizationFlags & MIR_INVOKE_METHOD_JIT) {
+                const Method *calleeMethod = mir->meta.callsiteInfo->method;
+                void *calleeAddr = dvmJitGetMethodAddr(calleeMethod->insns);
+                if (calleeAddr) {
+                    ArmLIR *classCheck;
+                    cUnit->printMe = true;
+                    genValidationForMethodCallee(cUnit, mir, &classCheck);
+                    newLIR2(cUnit, kThumbBl1, (int) calleeAddr,
+                            (int) calleeAddr);
+                    newLIR2(cUnit, kThumbBl2, (int) calleeAddr,
+                            (int) calleeAddr);
+                    genUnconditionalBranch(cUnit, retChainingCell);
+
+                    /* Target of slow path */
+                    ArmLIR *slowPathLabel = newLIR0(cUnit,
+                                                    kArmPseudoTargetLabel);
+
+                    slowPathLabel->defMask = ENCODE_ALL;
+                    classCheck->generic.target = (LIR *) slowPathLabel;
+                }
+            }
+
             genInvokeVirtualCommon(cUnit, mir, methodIndex,
                                    retChainingCell,
                                    predChainingCell,
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index f584ce7..c857fa5 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -79,6 +79,10 @@
         LOGE("InterpState.jitToInterpEntries size overflow");
         dvmAbort();
     }
+
+    /* No method JIT for Thumb backend */
+    gDvmJit.disableOpt |= (1 << kMethodJit);
+
     return true;
 }
 
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/Codegen.c b/vm/compiler/codegen/arm/armv5te-vfp/Codegen.c
index d17965d..a2d77ea 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/Codegen.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/Codegen.c
@@ -49,5 +49,8 @@
 /* MIR2LIR dispatcher and architectural independent codegen routines */
 #include "../CodegenDriver.c"
 
+/* Dummy driver for method-based JIT */
+#include "../armv5te/MethodCodegenDriver.c"
+
 /* Architecture manifest */
 #include "ArchVariant.c"
diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
index cf48d4e..0f16839 100644
--- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
@@ -79,6 +79,10 @@
         LOGE("InterpState.jitToInterpEntries size overflow");
         dvmAbort();
     }
+
+    /* No method JIT for Thumb backend */
+    gDvmJit.disableOpt |= (1 << kMethodJit);
+
     return true;
 }
 
diff --git a/vm/compiler/codegen/arm/armv5te/Codegen.c b/vm/compiler/codegen/arm/armv5te/Codegen.c
index 03c1435..f74d968 100644
--- a/vm/compiler/codegen/arm/armv5te/Codegen.c
+++ b/vm/compiler/codegen/arm/armv5te/Codegen.c
@@ -49,5 +49,8 @@
 /* MIR2LIR dispatcher and architectural independent codegen routines */
 #include "../CodegenDriver.c"
 
+/* Dummy driver for method-based JIT */
+#include "MethodCodegenDriver.c"
+
 /* Architecture manifest */
 #include "ArchVariant.c"
diff --git a/vm/compiler/codegen/arm/armv5te/MethodCodegenDriver.c b/vm/compiler/codegen/arm/armv5te/MethodCodegenDriver.c
new file mode 100644
index 0000000..20779f3
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv5te/MethodCodegenDriver.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
+{
+    LOGE("Method-based JIT not supported for the v5te target");
+    dvmAbort();
+}
diff --git a/vm/compiler/codegen/arm/armv7-a-neon/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a-neon/ArchVariant.c
index 7fcf031..3df1095 100644
--- a/vm/compiler/codegen/arm/armv7-a-neon/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a-neon/ArchVariant.c
@@ -74,6 +74,10 @@
         LOGE("InterpState.jitToInterpEntries size overflow");
         dvmAbort();
     }
+
+    /* FIXME - comment out the following to enable method-based JIT */
+    gDvmJit.disableOpt |= (1 << kMethodJit);
+
     return true;
 }
 
diff --git a/vm/compiler/codegen/arm/armv7-a-neon/Codegen.c b/vm/compiler/codegen/arm/armv7-a-neon/Codegen.c
index f0b7722..439add5 100644
--- a/vm/compiler/codegen/arm/armv7-a-neon/Codegen.c
+++ b/vm/compiler/codegen/arm/armv7-a-neon/Codegen.c
@@ -49,5 +49,8 @@
 /* MIR2LIR dispatcher and architectural independent codegen routines */
 #include "../CodegenDriver.c"
 
+/* Driver for method-based JIT */
+#include "MethodCodegenDriver.c"
+
 /* Architecture manifest */
 #include "ArchVariant.c"
diff --git a/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c b/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c
new file mode 100644
index 0000000..c25ab83
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv7-a-neon/MethodCodegenDriver.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Handle the content in each basic block */
+static bool methodBlockCodeGen(CompilationUnit *cUnit, BasicBlock *bb)
+{
+    MIR *mir;
+    ArmLIR *labelList = (ArmLIR *) cUnit->blockLabelList;
+    int blockId = bb->id;
+
+    cUnit->curBlock = bb;
+    labelList[blockId].operands[0] = bb->startOffset;
+
+    /* Insert the block label */
+    labelList[blockId].opcode = kArmPseudoNormalBlockLabel;
+    dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
+
+    dvmCompilerClobberAllRegs(cUnit);
+    dvmCompilerResetNullCheck(cUnit);
+
+    ArmLIR *headLIR = NULL;
+
+    if (bb->blockType == kMethodEntryBlock) {
+        opImm(cUnit, kOpPush, (1 << rlr | 1 << rFP));
+        opRegImm(cUnit, kOpSub, rFP,
+                 sizeof(StackSaveArea) + cUnit->method->registersSize * 4);
+
+    } else if (bb->blockType == kMethodExitBlock) {
+        opImm(cUnit, kOpPop, (1 << rpc | 1 << rFP));
+    }
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+
+        dvmCompilerResetRegPool(cUnit);
+        if (gDvmJit.disableOpt & (1 << kTrackLiveTemps)) {
+            dvmCompilerClobberAllRegs(cUnit);
+        }
+
+        if (gDvmJit.disableOpt & (1 << kSuppressLoads)) {
+            dvmCompilerResetDefTracking(cUnit);
+        }
+
+        Opcode dalvikOpcode = mir->dalvikInsn.opcode;
+        InstructionFormat dalvikFormat =
+            dexGetFormatFromOpcode(dalvikOpcode);
+
+        ArmLIR *boundaryLIR;
+
+        /*
+         * Don't generate the boundary LIR unless we are debugging this
+         * trace or we need a scheduling barrier.
+         */
+        if (headLIR == NULL || cUnit->printMe == true) {
+            boundaryLIR =
+                newLIR2(cUnit, kArmPseudoDalvikByteCodeBoundary,
+                        mir->offset,
+                        (int) dvmCompilerGetDalvikDisassembly(
+                            &mir->dalvikInsn, ""));
+            /* Remember the first LIR for this block */
+            if (headLIR == NULL) {
+                headLIR = boundaryLIR;
+                /* Set the first boundaryLIR as a scheduling barrier */
+                headLIR->defMask = ENCODE_ALL;
+            }
+        }
+
+        /* Don't generate the SSA annotation unless verbose mode is on */
+        if (cUnit->printMe && mir->ssaRep) {
+            char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
+            newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
+        }
+
+        bool notHandled;
+        switch (dalvikFormat) {
+            case kFmt10t:
+            case kFmt20t:
+            case kFmt30t:
+                notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit,
+                          mir, bb, labelList);
+                break;
+            case kFmt10x:
+                notHandled = handleFmt10x(cUnit, mir);
+                break;
+            case kFmt11n:
+            case kFmt31i:
+                notHandled = handleFmt11n_Fmt31i(cUnit, mir);
+                break;
+            case kFmt11x:
+                notHandled = handleFmt11x(cUnit, mir);
+                break;
+            case kFmt12x:
+                notHandled = handleFmt12x(cUnit, mir);
+                break;
+            case kFmt20bc:
+            case kFmt40sc:
+                notHandled = handleFmt20bc_Fmt40sc(cUnit, mir);
+                break;
+            case kFmt21c:
+            case kFmt31c:
+            case kFmt41c:
+                notHandled = handleFmt21c_Fmt31c_Fmt41c(cUnit, mir);
+                break;
+            case kFmt21h:
+                notHandled = handleFmt21h(cUnit, mir);
+                break;
+            case kFmt21s:
+                notHandled = handleFmt21s(cUnit, mir);
+                break;
+            case kFmt21t:
+                notHandled = handleFmt21t(cUnit, mir, bb, labelList);
+                break;
+            case kFmt22b:
+            case kFmt22s:
+                notHandled = handleFmt22b_Fmt22s(cUnit, mir);
+                break;
+            case kFmt22c:
+            case kFmt52c:
+                notHandled = handleFmt22c_Fmt52c(cUnit, mir);
+                break;
+            case kFmt22cs:
+                notHandled = handleFmt22cs(cUnit, mir);
+                break;
+            case kFmt22t:
+                notHandled = handleFmt22t(cUnit, mir, bb, labelList);
+                break;
+            case kFmt22x:
+            case kFmt32x:
+                notHandled = handleFmt22x_Fmt32x(cUnit, mir);
+                break;
+            case kFmt23x:
+                notHandled = handleFmt23x(cUnit, mir);
+                break;
+            case kFmt31t:
+                notHandled = handleFmt31t(cUnit, mir);
+                break;
+            case kFmt3rc:
+            case kFmt35c:
+            case kFmt5rc:
+                notHandled = handleFmt35c_3rc_5rc(cUnit, mir, bb,
+                                              labelList);
+                break;
+            case kFmt3rms:
+            case kFmt35ms:
+                notHandled = handleFmt35ms_3rms(cUnit, mir, bb,
+                                                labelList);
+                break;
+            case kFmt35mi:
+            case kFmt3rmi:
+                notHandled = handleExecuteInline(cUnit, mir);
+                break;
+            case kFmt51l:
+                notHandled = handleFmt51l(cUnit, mir);
+                break;
+            default:
+                notHandled = true;
+                break;
+        }
+
+        /* FIXME - to be implemented */
+        if (notHandled == true && dalvikOpcode >= kNumPackedOpcodes) {
+            notHandled = false;
+        }
+
+        if (notHandled) {
+            LOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled\n",
+                 mir->offset,
+                 dalvikOpcode, dexGetOpcodeName(dalvikOpcode),
+                 dalvikFormat);
+            dvmCompilerAbort(cUnit);
+            break;
+        }
+    }
+
+    if (headLIR) {
+        /*
+         * Eliminate redundant loads/stores and delay stores into later
+         * slots
+         */
+        dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
+                                           cUnit->lastLIRInsn);
+
+        /*
+         * Generate an unconditional branch to the fallthrough block.
+         */
+        if (bb->fallThrough) {
+            genUnconditionalBranch(cUnit,
+                                   &labelList[bb->fallThrough->id]);
+        }
+    }
+    return false;
+}
+
+void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
+{
+    // FIXME - enable method compilation for selected routines here
+    if (strcmp(cUnit->method->name, "add")) return;
+
+    /* Used to hold the labels of each block */
+    cUnit->blockLabelList =
+        (void *) dvmCompilerNew(sizeof(ArmLIR) * cUnit->numBlocks, true);
+
+    dvmCompilerDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
+                                          kPreOrderDFSTraversal,
+                                          false /* isIterative */);
+
+    dvmCompilerApplyGlobalOptimizations(cUnit);
+
+    // FIXME - temporarily enable verbose printing for all methods
+    cUnit->printMe = true;
+
+#if defined(WITH_SELF_VERIFICATION)
+    selfVerificationBranchInsertPass(cUnit);
+#endif
+}
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 7fcf031..3df1095 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -74,6 +74,10 @@
         LOGE("InterpState.jitToInterpEntries size overflow");
         dvmAbort();
     }
+
+    /* FIXME - comment out the following to enable method-based JIT */
+    gDvmJit.disableOpt |= (1 << kMethodJit);
+
     return true;
 }
 
diff --git a/vm/compiler/codegen/arm/armv7-a/Codegen.c b/vm/compiler/codegen/arm/armv7-a/Codegen.c
index 05dda0c..36771ef 100644
--- a/vm/compiler/codegen/arm/armv7-a/Codegen.c
+++ b/vm/compiler/codegen/arm/armv7-a/Codegen.c
@@ -49,5 +49,8 @@
 /* MIR2LIR dispatcher and architectural independent codegen routines */
 #include "../CodegenDriver.c"
 
+/* Driver for method-based JIT */
+#include "../armv7-a-neon/MethodCodegenDriver.c"
+
 /* Architecture manifest */
 #include "ArchVariant.c"