Support for stopping all threads in a Jit environment.
diff --git a/vm/Globals.h b/vm/Globals.h
index b9c73fe..528185d 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -363,6 +363,12 @@
     pthread_cond_t  threadSuspendCountCond;
 
     /*
+     * Sum of all threads' suspendCount fields.  The JIT needs to know if any
+     * thread is suspended.  Guarded by threadSuspendCountLock.
+     */
+    int  sumThreadSuspendCount;
+
+    /*
      * MUTEX ORDERING: when locking multiple mutexes, always grab them in
      * this order to avoid deadlock:
      *
diff --git a/vm/Thread.c b/vm/Thread.c
index b945483..6420533 100644
--- a/vm/Thread.c
+++ b/vm/Thread.c
@@ -234,6 +234,16 @@
 static void waitForThreadSuspend(Thread* self, Thread* thread);
 static int getThreadPriorityFromSystem(void);
 
+/*
+ * The JIT needs to know if any thread is suspended.  We do this by
+ * maintaining a global sum of all threads' suspend counts.  All suspendCount
+ * updates should go through this after aquiring threadSuspendCountLock.
+ */
+static inline void dvmAddToThreadSuspendCount(int *pSuspendCount, int delta)
+{
+    *pSuspendCount += delta;
+    gDvm.sumThreadSuspendCount += delta;
+}
 
 /*
  * Initialize thread list and main thread's environment.  We need to set
@@ -1556,6 +1566,12 @@
     }
 
 bail:
+#if defined(WITH_JIT)
+    /* Remove this thread's suspendCount from global suspendCount sum */
+    lockThreadSuspendCount();
+    dvmAddToThreadSuspendCount(&self->suspendCount, -self->suspendCount);
+    unlockThreadSuspendCount();
+#endif
     dvmReleaseTrackedAlloc(exception, self);
 }
 
@@ -2132,7 +2148,7 @@
     //assert(thread->handle != dvmJdwpGetDebugThread(gDvm.jdwpState));
 
     lockThreadSuspendCount();
-    thread->suspendCount++;
+    dvmAddToThreadSuspendCount(&thread->suspendCount, 1);
     thread->dbgSuspendCount++;
 
     LOG_THREAD("threadid=%d: suspend++, now=%d\n",
@@ -2161,7 +2177,7 @@
 
     lockThreadSuspendCount();
     if (thread->suspendCount > 0) {
-        thread->suspendCount--;
+        dvmAddToThreadSuspendCount(&thread->suspendCount, -1);
         thread->dbgSuspendCount--;
     } else {
         LOG_THREAD("threadid=%d:  suspendCount already zero\n",
@@ -2199,7 +2215,7 @@
      * though.
      */
     lockThreadSuspendCount();
-    self->suspendCount++;
+    dvmAddToThreadSuspendCount(&self->suspendCount, 1);
     self->dbgSuspendCount++;
 
     /*
@@ -2337,10 +2353,12 @@
  *
  * TODO: track basic stats about time required to suspend VM.
  */
+#define FIRST_SLEEP (250*1000)    /* 0.25s */
+#define MORE_SLEEP  (750*1000)    /* 0.75s */
 static void waitForThreadSuspend(Thread* self, Thread* thread)
 {
     const int kMaxRetries = 10;
-    const int kSpinSleepTime = 750*1000;        /* 0.75s */
+    int spinSleepTime = FIRST_SLEEP;
 
     int sleepIter = 0;
     int retryCount = 0;
@@ -2350,7 +2368,18 @@
         if (sleepIter == 0)         // get current time on first iteration
             startWhen = dvmGetRelativeTimeUsec();
 
-        if (!dvmIterativeSleep(sleepIter++, kSpinSleepTime, startWhen)) {
+#if defined (WITH_JIT)
+        /*
+         * If we're still waiting after the first timeout,
+         * unchain all translations.
+         */
+        if (gDvmJit.pJitEntryTable && retryCount > 0) {
+            LOGD("JIT unchain all attempt #%d",retryCount);
+            dvmJitUnchainAll();
+        }
+#endif
+
+        if (!dvmIterativeSleep(sleepIter++, spinSleepTime, startWhen)) {
             LOGW("threadid=%d (h=%d): spin on suspend threadid=%d (handle=%d)\n",
                 self->threadId, (int)self->handle,
                 thread->threadId, (int)thread->handle);
@@ -2358,6 +2387,7 @@
 
             // keep going; could be slow due to valgrind
             sleepIter = 0;
+            spinSleepTime = MORE_SLEEP;
 
             if (retryCount++ == kMaxRetries) {
                 LOGE("threadid=%d: stuck on threadid=%d, giving up\n",
@@ -2431,7 +2461,7 @@
             thread->handle == dvmJdwpGetDebugThread(gDvm.jdwpState))
             continue;
 
-        thread->suspendCount++;
+        dvmAddToThreadSuspendCount(&thread->suspendCount, 1);
         if (why == SUSPEND_FOR_DEBUG || why == SUSPEND_FOR_DEBUG_EVENT)
             thread->dbgSuspendCount++;
     }
@@ -2506,7 +2536,7 @@
             continue;
 
         if (thread->suspendCount > 0) {
-            thread->suspendCount--;
+            dvmAddToThreadSuspendCount(&thread->suspendCount, -1);
             if (why == SUSPEND_FOR_DEBUG || why == SUSPEND_FOR_DEBUG_EVENT)
                 thread->dbgSuspendCount--;
         } else {
@@ -2562,7 +2592,8 @@
         }
 
         assert(thread->suspendCount >= thread->dbgSuspendCount);
-        thread->suspendCount -= thread->dbgSuspendCount;
+        dvmAddToThreadSuspendCount(&thread->suspendCount,
+                                   -thread->dbgSuspendCount);
         thread->dbgSuspendCount = 0;
     }
     unlockThreadSuspendCount();
@@ -3524,4 +3555,3 @@
      */
     gcScanAllThreads();
 }
-
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
index 7209701..9bcee12 100644
--- a/vm/compiler/Compiler.h
+++ b/vm/compiler/Compiler.h
@@ -95,5 +95,6 @@
 void *dvmCompileTrace(JitTraceDescription *trace);
 void dvmCompilerDumpStats(void);
 void dvmCompilerDrainQueue(void);
+void dvmJitUnchainAll(void);
 
 #endif /* _DALVIK_VM_COMPILER */
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 6ffdf44..67cb637 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -28,6 +28,13 @@
     EXCEPTION_HANDLING,
 } BBType;
 
+typedef struct ChainCellCounts {
+    union {
+        u1 count[CHAINING_CELL_LAST];
+        u4 dummyForAlignment;
+    } u;
+} ChainCellCounts;
+
 typedef struct LIR {
     int offset;
     struct LIR *next;
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
index 14355cb..baa6ced 100644
--- a/vm/compiler/codegen/armv5te/Assemble.c
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -216,9 +216,9 @@
 #define PADDING_MOV_R0_R0               0x1C00
 
 /* Write the numbers in the literal pool to the codegen stream */
-static void writeDataContent(CompilationUnit *cUnit)
+static void installDataContent(CompilationUnit *cUnit)
 {
-    int *dataPtr = (int *) (cUnit->codeBuffer + cUnit->dataOffset);
+    int *dataPtr = (int *) (cUnit->baseAddr + cUnit->dataOffset);
     Armv5teLIR *dataLIR = (Armv5teLIR *) cUnit->wordList;
     while (dataLIR) {
         *dataPtr++ = dataLIR->operands[0];
@@ -226,6 +226,17 @@
     }
 }
 
+/* Returns the size of a Jit trace description */
+static int jitTraceDescriptionSize(const JitTraceDescription *desc)
+{
+    int runCount;
+    for (runCount = 0; ; runCount++) {
+        if (desc->trace[runCount].frag.runEnd)
+           break;
+    }
+    return sizeof(JitCodeDesc) + ((runCount+1) * sizeof(JitTraceRun));
+}
+
 /* Return TRUE if error happens */
 static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
 {
@@ -383,19 +394,58 @@
 }
 
 /*
+ * Translation layout in the code cache.  Note that the codeAddress pointer
+ * in JitTable will point directly to the code body (field codeAddress).  The
+ * chain cell offset codeAddress - 2, and (if present) executionCount is at
+ * codeAddress - 6.
+ *
+ *      +----------------------------+
+ *      | Execution count            |  -> [Optional] 4 bytes
+ *      +----------------------------+
+ *   +--| Offset to chain cell counts|  -> 2 bytes
+ *   |  +----------------------------+
+ *   |  | Code body                  |  -> Start address for translation
+ *   |  |                            |     variable in 2-byte chunks
+ *   |  .                            .     (JitTable's codeAddress points here)
+ *   |  .                            .
+ *   |  |                            |
+ *   |  +----------------------------+
+ *   |  | Chaining Cells             |  -> 8 bytes each, must be 4 byte aligned
+ *   |  .                            .
+ *   |  .                            .
+ *   |  |                            |
+ *   |  +----------------------------+
+ *   +->| Chaining cell counts       |  -> 4 bytes, chain cell counts by type
+ *      +----------------------------+
+ *      | Trace description          |  -> variable sized
+ *      .                            .
+ *      |                            |
+ *      +----------------------------+
+ *      | Literal pool               |  -> 4-byte aligned, variable size
+ *      .                            .
+ *      .                            .
+ *      |                            |
+ *      +----------------------------+
+ *
  * Go over each instruction in the list and calculate the offset from the top
  * before sending them off to the assembler. If out-of-range branch distance is
  * seen rearrange the instructions a bit to correct it.
  */
+#define CHAIN_CELL_OFFSET_SIZE 2
 void dvmCompilerAssembleLIR(CompilationUnit *cUnit)
 {
     LIR *lir;
     Armv5teLIR *armLIR;
     int offset;
     int i;
+    ChainCellCounts chainCellCounts;
+    u2 chainCellOffset;
+    int descSize = jitTraceDescriptionSize(cUnit->traceDesc);
 
 retry:
-    for (armLIR = (Armv5teLIR *) cUnit->firstLIRInsn, offset = 0;
+    /* Beginning offset needs to allow space for chain cell offset */
+    for (armLIR = (Armv5teLIR *) cUnit->firstLIRInsn,
+         offset = CHAIN_CELL_OFFSET_SIZE;
          armLIR;
          armLIR = NEXT_LIR(armLIR)) {
         armLIR->generic.offset = offset;
@@ -413,8 +463,15 @@
     }
 
     /* Const values have to be word aligned */
-    offset = ((offset + 3) >> 2) << 2;
+    offset = (offset + 3) & ~3;
 
+    /* Add space for chain cell counts & trace description */
+    chainCellOffset = offset;
+    offset += sizeof(chainCellCounts) + descSize;
+
+    assert((offset & 0x3) == 0);  /* Should still be word aligned */
+
+    /* Set up offsets for literals */
     cUnit->dataOffset = offset;
 
     for (lir = cUnit->wordList; lir; lir = lir->next) {
@@ -424,12 +481,14 @@
 
     cUnit->totalSize = offset;
 
-    if (gDvmJit.codeCacheByteUsed + offset > CODE_CACHE_SIZE) {
+    if (gDvmJit.codeCacheByteUsed + cUnit->totalSize > CODE_CACHE_SIZE) {
         gDvmJit.codeCacheFull = true;
         cUnit->baseAddr = NULL;
         return;
     }
-    cUnit->codeBuffer = dvmCompilerNew(offset, true);
+
+    /* Allocate enough space for the code block */
+    cUnit->codeBuffer = dvmCompilerNew(chainCellOffset, true);
     if (cUnit->codeBuffer == NULL) {
         LOGE("Code buffer allocation failure\n");
         cUnit->baseAddr = NULL;
@@ -442,19 +501,36 @@
     if (needRetry)
         goto retry;
 
-    writeDataContent(cUnit);
-
     cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
     gDvmJit.codeCacheByteUsed += offset;
 
+    /* Install the chain cell offset */
+    *((char*)cUnit->baseAddr) = chainCellOffset;
 
-    /* Install the compilation */
-    memcpy(cUnit->baseAddr, cUnit->codeBuffer, offset);
+    /* Install the code block */
+    memcpy((char*)cUnit->baseAddr + 2, cUnit->codeBuffer, chainCellOffset - 2);
     gDvmJit.numCompilations++;
 
+    /* Install the chaining cell counts */
+    for (i=0; i< CHAINING_CELL_LAST; i++) {
+        chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
+    }
+    memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts,
+           sizeof(chainCellCounts));
+
+    /* Install the trace description */
+    memcpy((char*)cUnit->baseAddr + chainCellOffset + sizeof(chainCellCounts),
+           cUnit->traceDesc, descSize);
+
+    /* Write the literals directly into the code cache */
+    installDataContent(cUnit);
+
     /* Flush dcache and invalidate the icache to maintain coherence */
-    cacheflush((intptr_t) cUnit->baseAddr,
-               (intptr_t) (cUnit->baseAddr + offset), 0);
+    cacheflush((long)cUnit->baseAddr,
+               (long)(cUnit->baseAddr + offset), 0);
+
+    /* Adjust baseAddr to point to executable code */
+    cUnit->baseAddr = (char*)cUnit->baseAddr + CHAIN_CELL_OFFSET_SIZE;
 }
 
 /*
@@ -466,7 +542,8 @@
  * Where HH is 10 for the 1st inst, and 11 for the second and
  * the "o" field is each instruction's 11-bit contribution to the
  * 22-bit branch offset.
- * TUNING: use a single-instruction variant if it reaches.
+ * If the target is nearby, use a single-instruction bl.
+ * If one or more threads is suspended, don't chain.
  */
 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
 {
@@ -476,24 +553,117 @@
     u4 thumb2;
     u4 newInst;
 
-    assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
+    if (gDvm.sumThreadSuspendCount == 0) {
+        assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
 
-    gDvmJit.translationChains++;
+        gDvmJit.translationChains++;
 
-    COMPILER_TRACE_CHAINING(
-        LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
-             (int) branchAddr, (int) tgtAddr & -2));
-    if ((branchOffset < -2048) | (branchOffset > 2046)) {
-        thumb1 =  (0xf000 | ((branchOffset>>12) & 0x7ff));
-        thumb2 =  (0xf800 | ((branchOffset>> 1) & 0x7ff));
-    } else {
-        thumb1 =  (0xe000 | ((branchOffset>> 1) & 0x7ff));
-        thumb2 =  0x4300;  /* nop -> or r0, r0 */
+        COMPILER_TRACE_CHAINING(
+            LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
+                 (int) branchAddr, (int) tgtAddr & -2));
+        if ((branchOffset < -2048) | (branchOffset > 2046)) {
+            thumb1 =  (0xf000 | ((branchOffset>>12) & 0x7ff));
+            thumb2 =  (0xf800 | ((branchOffset>> 1) & 0x7ff));
+        } else {
+            thumb1 =  (0xe000 | ((branchOffset>> 1) & 0x7ff));
+            thumb2 =  0x4300;  /* nop -> or r0, r0 */
+        }
+
+        newInst = thumb2<<16 | thumb1;
+        *branchAddr = newInst;
+        cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
     }
 
-    newInst = thumb2<<16 | thumb1;
-    *branchAddr = newInst;
-    cacheflush((intptr_t) branchAddr, (intptr_t) branchAddr + 4, 0);
-
     return tgtAddr;
 }
+
+/*
+ * Unchain a trace given the starting address of the translation
+ * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
+ * Returns the address following the last cell unchained.  Note that
+ * the incoming codeAddr is a thumb code address, and therefore has
+ * the low bit set.
+ */
+u4* dvmJitUnchain(void* codeAddr)
+{
+    u2* pChainCellOffset = (u2*)((char*)codeAddr - 3);
+    u2 chainCellOffset = *pChainCellOffset;
+    ChainCellCounts *pChainCellCounts =
+          (ChainCellCounts*)((char*)codeAddr + chainCellOffset -3);
+    int cellCount;
+    u4* pChainCells;
+    u4* pStart;
+    u4 thumb1;
+    u4 thumb2;
+    u4 newInst;
+    int i,j;
+
+    /* Get total count of chain cells */
+    for (i = 0, cellCount = 0; i < CHAINING_CELL_LAST; i++) {
+        cellCount += pChainCellCounts->u.count[i];
+    }
+
+    /* Locate the beginning of the chain cell region */
+    pStart = pChainCells = (u4*)((char*)pChainCellCounts - (cellCount * 8));
+
+    /* The cells are sorted in order - walk through them and reset */
+    for (i = 0; i < CHAINING_CELL_LAST; i++) {
+        for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
+            int targetOffset;
+            switch(i) {
+                case CHAINING_CELL_GENERIC:
+                    targetOffset = offsetof(InterpState,
+                          jitToInterpEntries.dvmJitToInterpNormal);
+                    break;
+                case CHAINING_CELL_POST_INVOKE:
+                case CHAINING_CELL_INVOKE:
+                    targetOffset = offsetof(InterpState,
+                          jitToInterpEntries.dvmJitToTraceSelect);
+                    break;
+                default:
+                    dvmAbort();
+            }
+            /*
+             * Arm code sequence for a chaining cell is:
+             *     ldr  r0, rGLUE, #<word offset>
+             *     blx  r0
+             */
+            COMPILER_TRACE_CHAINING(
+                LOGD("Jit Runtime: unchaining 0x%x", (int)pChainCells));
+            targetOffset = targetOffset >> 2;  /* convert to word offset */
+            thumb1 = 0x6800 | (targetOffset << 6) | (rGLUE << 3) | (r0 << 0);
+            thumb2 = 0x4780 | (r0 << 3);
+            newInst = thumb2<<16 | thumb1;
+            *pChainCells = newInst;
+            pChainCells += 2;  /* Advance by 2 words */
+        }
+    }
+    return pChainCells;
+}
+
+/* Unchain all translation in the cache. */
+void dvmJitUnchainAll()
+{
+    u4* lowAddress = NULL;
+    u4* highAddress = NULL;
+    unsigned int i;
+    if (gDvmJit.pJitEntryTable != NULL) {
+        COMPILER_TRACE_CHAINING(LOGD("Jit Runtime: unchaining all"));
+        dvmLockMutex(&gDvmJit.tableLock);
+        for (i = 0; i < gDvmJit.maxTableEntries; i++) {
+            if (gDvmJit.pJitEntryTable[i].dPC &&
+                   gDvmJit.pJitEntryTable[i].codeAddress) {
+                u4* lastAddress;
+                lastAddress =
+                      dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
+                if (lowAddress == NULL ||
+                      (u4*)gDvmJit.pJitEntryTable[i].codeAddress < lowAddress)
+                    lowAddress = lastAddress;
+                if (lastAddress > highAddress)
+                    highAddress = lastAddress;
+            }
+        }
+        cacheflush((long)lowAddress, (long)highAddress, 0);
+        dvmUnlockMutex(&gDvmJit.tableLock);
+    }
+}
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index e23361b..f2b3c50 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -206,28 +206,6 @@
     }
 }
 
-/* Returns the signed branch displacement of a Dalvik instruction. */
-int dvmGetBranchDisplacement( DecodedInstruction* decInsn )
-{
-    int res = 0;
-    switch (dexGetInstrFormat(gDvm.instrFormat, decInsn->opCode)) {
-        case kFmt22t:
-            res = decInsn->vC;
-            break;
-        case kFmt20t:
-        case kFmt21t:
-            res = decInsn->vB;
-            break;
-        case kFmt10t:
-        case kFmt30t:
-            res = decInsn->vA;
-            break;
-        default:
-            dvmAbort();
-    }
-    return res;
-}
-
 /*
  * Adds to the current trace request one instruction at a time, just
  * before that instruction is interpreted.  This is the primary trace
@@ -395,6 +373,11 @@
 {
     int idx = dvmJitHash(dPC);
 
+    /* If anything is suspended, don't re-enter the code cache */
+    if (gDvm.sumThreadSuspendCount > 0) {
+        return NULL;
+    }
+
     /* Expect a high hit rate on 1st shot */
     if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
 #if defined(EXIT_STATS)
diff --git a/vm/mterp/armv5te/footer.S b/vm/mterp/armv5te/footer.S
index beff938..f4df342 100644
--- a/vm/mterp/armv5te/footer.S
+++ b/vm/mterp/armv5te/footer.S
@@ -70,7 +70,9 @@
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /* No translation, so request one if profiling isn't disabled*/
 2:
@@ -106,11 +108,13 @@
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
     cmp    r0,#0
-    beq    1f                       @ go if not, otherwise do chain
+    beq    toInterpreter            @ go if not, otherwise do chain
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /*
  * Return from the translation cache to the interpreter to do method invocation.
@@ -132,7 +136,7 @@
  * already been restored by the time we get here.  We'll need to set
  * up rIBASE & rINST, and load the address of the JitTable into r0.
  */
-1:
+toInterpreter:
     EXPORT_PC()
     adrl   rIBASE, dvmAsmInstructionStart
     FETCH_INST()
diff --git a/vm/mterp/out/InterpAsm-armv4t.S b/vm/mterp/out/InterpAsm-armv4t.S
index a3b097e..0ede758 100644
--- a/vm/mterp/out/InterpAsm-armv4t.S
+++ b/vm/mterp/out/InterpAsm-armv4t.S
@@ -9552,7 +9552,9 @@
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /* No translation, so request one if profiling isn't disabled*/
 2:
@@ -9588,11 +9590,13 @@
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
     cmp    r0,#0
-    beq    1f                       @ go if not, otherwise do chain
+    beq    toInterpreter            @ go if not, otherwise do chain
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /*
  * Return from the translation cache to the interpreter to do method invocation.
@@ -9614,7 +9618,7 @@
  * already been restored by the time we get here.  We'll need to set
  * up rIBASE & rINST, and load the address of the JitTable into r0.
  */
-1:
+toInterpreter:
     EXPORT_PC()
     adrl   rIBASE, dvmAsmInstructionStart
     FETCH_INST()
diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S
index 8b062f4..e3d33e8 100644
--- a/vm/mterp/out/InterpAsm-armv5te-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S
@@ -9066,7 +9066,9 @@
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /* No translation, so request one if profiling isn't disabled*/
 2:
@@ -9102,11 +9104,13 @@
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
     cmp    r0,#0
-    beq    1f                       @ go if not, otherwise do chain
+    beq    toInterpreter            @ go if not, otherwise do chain
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /*
  * Return from the translation cache to the interpreter to do method invocation.
@@ -9128,7 +9132,7 @@
  * already been restored by the time we get here.  We'll need to set
  * up rIBASE & rINST, and load the address of the JitTable into r0.
  */
-1:
+toInterpreter:
     EXPORT_PC()
     adrl   rIBASE, dvmAsmInstructionStart
     FETCH_INST()
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index 94e81e0..d659cf7 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -9546,7 +9546,9 @@
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /* No translation, so request one if profiling isn't disabled*/
 2:
@@ -9582,11 +9584,13 @@
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
     cmp    r0,#0
-    beq    1f                       @ go if not, otherwise do chain
+    beq    toInterpreter            @ go if not, otherwise do chain
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
     ldr    rINST, .LdvmCompilerTemplateStart @ rINST is rCBASE in compiled code
-    bx     r0                       @ continue native execution
+    cmp    r0,#0                    @ successful chain?
+    bxne   r0                       @ continue native execution
+    b      toInterpreter            @ didn't chain - resume with interpreter
 
 /*
  * Return from the translation cache to the interpreter to do method invocation.
@@ -9608,7 +9612,7 @@
  * already been restored by the time we get here.  We'll need to set
  * up rIBASE & rINST, and load the address of the JitTable into r0.
  */
-1:
+toInterpreter:
     EXPORT_PC()
     adrl   rIBASE, dvmAsmInstructionStart
     FETCH_INST()