Introduce "just interpret" chainable pseudo-translation.

This is the first step towards enabling translation & self-cosim stress modes.
When trace selection begins, the trace head address is pinned and
remains in a limbo state until the translation is complete.  Previously,
if the trace selected aborted for any reason, the trace head would remain
forever in limbo.  This was not a correctness problem, but caused some
small performance anomolies and made life more difficult for self-cosimulation
mode.

This CL introduces a pseudo-translation that simply routes control to
the interpreter.  When we detect that a trace selection attempt has
failed, the trace head is associated with this fully-chainable
pseudo-translation.  This also has the benefit for self-cosimulation that
we are guaranteed forward progress.
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index bc39479..8e977c1 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1259,16 +1259,28 @@
         info->codeAddress = (char*)info->codeAddress + 1;
 }
 
-static u4 assembleBXPair(int branchOffset)
+/*
+ * Returns the skeleton bit pattern associated with an opcode.  All
+ * variable fields are zeroed.
+ */
+static u4 getSkeleton(ArmOpCode op)
+{
+    return EncodingMap[op].skeleton;
+}
+
+static u4 assembleChainingBranch(int branchOffset, bool thumbTarget)
 {
     u4 thumb1, thumb2;
 
-    if ((branchOffset < -2048) | (branchOffset > 2046)) {
-        thumb1 =  (0xf000 | ((branchOffset>>12) & 0x7ff));
-        thumb2 =  (0xf800 | ((branchOffset>> 1) & 0x7ff));
+    if (!thumbTarget) {
+        thumb1 =  (getSkeleton(kThumbBlx1) | ((branchOffset>>12) & 0x7ff));
+        thumb2 =  (getSkeleton(kThumbBlx2) | ((branchOffset>> 1) & 0x7ff));
+    } else if ((branchOffset < -2048) | (branchOffset > 2046)) {
+        thumb1 =  (getSkeleton(kThumbBl1) | ((branchOffset>>12) & 0x7ff));
+        thumb2 =  (getSkeleton(kThumbBl2) | ((branchOffset>> 1) & 0x7ff));
     } else {
-        thumb1 =  (0xe000 | ((branchOffset>> 1) & 0x7ff));
-        thumb2 =  0x4300;  /* nop -> or r0, r0 */
+        thumb1 =  (getSkeleton(kThumbBUncond) | ((branchOffset>> 1) & 0x7ff));
+        thumb2 =  getSkeleton(kThumbOrr);  /* nop -> or r0, r0 */
     }
 
     return thumb2<<16 | thumb1;
@@ -1278,7 +1290,8 @@
  * Perform translation chain operation.
  * For ARM, we'll use a pair of thumb instructions to generate
  * an unconditional chaining branch of up to 4MB in distance.
- * Use a BL, though we don't really need the link.  The format is
+ * Use a BL, because the generic "interpret" translation needs
+ * the link register to find the dalvik pc of teh target.
  *     111HHooooooooooo
  * Where HH is 10 for the 1st inst, and 11 for the second and
  * the "o" field is each instruction's 11-bit contribution to the
@@ -1291,6 +1304,7 @@
     int baseAddr = (u4) branchAddr + 4;
     int branchOffset = (int) tgtAddr - baseAddr;
     u4 newInst;
+    bool thumbTarget;
 
     if (gDvm.sumThreadSuspendCount == 0) {
         assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
@@ -1301,7 +1315,16 @@
             LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
                  (int) branchAddr, (int) tgtAddr & -2));
 
-        newInst = assembleBXPair(branchOffset);
+        /*
+         * NOTE: normally, all translations are Thumb[2] mode, with
+         * a single exception: the default TEMPLATE_INTERPRET
+         * pseudo-translation.  If the need ever arises to
+         * mix Arm & Thumb[2] translations, the following code should be
+         * generalized.
+         */
+        thumbTarget = (tgtAddr != gDvmJit.interpretTemplate);
+
+        newInst = assembleChainingBranch(branchOffset, thumbTarget);
 
         *branchAddr = newInst;
         cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
@@ -1354,7 +1377,7 @@
      * Compilation not made yet for the callee. Reset the counter to a small
      * value and come back to check soon.
      */
-    if (tgtAddr == 0) {
+    if ((tgtAddr == 0) || ((void*)tgtAddr == gDvmJit.interpretTemplate)) {
         /*
          * Wait for a few invocations (currently set to be 16) before trying
          * to setup the chain again.
@@ -1388,7 +1411,7 @@
              clazz->descriptor,
              method->name));
 
-    cell->branch = assembleBXPair(branchOffset);
+    cell->branch = assembleChainingBranch(branchOffset, true);
     cell->clazz = clazz;
     cell->method = method;
     /*
@@ -1517,7 +1540,9 @@
         dvmLockMutex(&gDvmJit.tableLock);
         for (i = 0; i < gDvmJit.jitTableSize; i++) {
             if (gDvmJit.pJitEntryTable[i].dPC &&
-                   gDvmJit.pJitEntryTable[i].codeAddress) {
+                   gDvmJit.pJitEntryTable[i].codeAddress &&
+                   (gDvmJit.pJitEntryTable[i].codeAddress !=
+                    gDvmJit.interpretTemplate)) {
                 u4* lastAddress;
                 lastAddress =
                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
@@ -1573,6 +1598,10 @@
         LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
         return 0;
     }
+    if (p->codeAddress == gDvmJit.interpretTemplate) {
+        LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY  0 0", (int)traceBase);
+        return 0;
+    }
 
     pExecutionCount = (u4*) (traceBase);
     pCellOffset = (u2*) (traceBase + 4);