Fix performance issues related to chaining and unchaining.

1) Patching requests for predicted chaining cells (used by virtual/interface
methods) are now batched in a queue and processed when the VM is paused for GC.

2) When the code cache is full the reset operation is also conducted at the
end of GC pauses so this totally eliminates the need for the compiler thread
to issue suspend-all requests. This is a very rare event and when happening it
takes less than 5ms to finish.

3) Change the initial value of the branch in a predicted chaining cell from 0
(ie lsl r0, r0, #0) to 0xe7fe (ie branch to self) so that initializing a
predicted chaining cell doesn't need to suspend all threads. Together with 1)
seeing 20% speedup on some benchmarks.

4) Add TestCompability.c where defining "TEST_VM_IN_ECLAIR := true" in
buildspec.mk will activate dummy symbols needed to run libdvm.so in older
releases.

Bug: 2397689
Bug: 2396513
Bug: 2331313
diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h
index ff39cd4..4a27a67 100644
--- a/vm/compiler/codegen/CompilerCodegen.h
+++ b/vm/compiler/codegen/CompilerCodegen.h
@@ -41,6 +41,7 @@
 void* dvmJitChain(void *tgtAddr, u4* branchAddr);
 u4* dvmJitUnchain(void *codeAddr);
 void dvmJitUnchainAll(void);
+void dvmCompilerPatchInlineCache(void);
 
 /* Implemented in codegen/<target>/Ralloc.c */
 void dvmCompilerRegAlloc(CompilationUnit *cUnit);
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 3254ff7..21e2a32 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -730,16 +730,9 @@
     u8 defMask;         // Resource mask for def
 } ArmLIR;
 
-/* Chain cell for predicted method invocation */
-typedef struct PredictedChainingCell {
-    u4 branch;                  /* Branch to chained destination */
-    const ClassObject *clazz;   /* key #1 for prediction */
-    const Method *method;       /* key #2 to lookup native PC from dalvik PC */
-    u4 counter;                 /* counter to patch the chaining cell */
-} PredictedChainingCell;
-
 /* Init values when a predicted chain is initially assembled */
-#define PREDICTED_CHAIN_BX_PAIR_INIT     0
+/* E7FE is branch to self */
+#define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
 #define PREDICTED_CHAIN_CLAZZ_INIT       0
 #define PREDICTED_CHAIN_METHOD_INIT      0
 #define PREDICTED_CHAIN_COUNTER_INIT     0
@@ -748,7 +741,7 @@
 #define PREDICTED_CHAIN_COUNTER_DELAY    512
 
 /* Rechain after this many mis-predictions have happened */
-#define PREDICTED_CHAIN_COUNTER_RECHAIN  8192
+#define PREDICTED_CHAIN_COUNTER_RECHAIN  1024
 
 /* Used if the resolved callee is a native method */
 #define PREDICTED_CHAIN_COUNTER_AVOID    0x7fffffff
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 998c955..c3ad957 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1328,7 +1328,12 @@
     u4 newInst;
     bool thumbTarget;
 
-    if ((gDvmJit.pProfTable != NULL) && gDvm.sumThreadSuspendCount == 0) {
+    /*
+     * Only chain translations when there is no urge to ask all threads to
+     * suspend themselves via the interpreter.
+     */
+    if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
+        (gDvmJit.codeCacheFull == false)) {
         assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
 
         gDvmJit.translationChains++;
@@ -1350,12 +1355,48 @@
 
         *branchAddr = newInst;
         cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
+        gDvmJit.hasNewChain = true;
     }
 
     return tgtAddr;
 }
 
 /*
+ * Attempt to enqueue a work order to patch an inline cache for a predicted
+ * chaining cell for virtual/interface calls.
+ */
+bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
+                             PredictedChainingCell *newContent)
+{
+    bool result = true;
+
+    dvmLockMutex(&gDvmJit.compilerICPatchLock);
+
+    if (cellAddr->clazz == NULL &&
+        cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
+        /*
+         * The update order matters - make sure clazz is updated last since it
+         * will bring the uninitialized chaining cell to life.
+         */
+        cellAddr->method = newContent->method;
+        cellAddr->branch = newContent->branch;
+        cellAddr->counter = newContent->counter;
+        cellAddr->clazz = newContent->clazz;
+        cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
+    }
+    else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
+        int index = gDvmJit.compilerICPatchIndex++;
+        gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
+        gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
+    } else {
+        result = false;
+    }
+
+    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
+    return result;
+}
+
+/*
  * This method is called from the invoke templates for virtual and interface
  * methods to speculatively setup a chain to the callee. The templates are
  * written in assembly and have setup method, cell, and clazz at r0, r2, and
@@ -1412,41 +1453,29 @@
         goto done;
     }
 
-    /*
-     * Bump up the counter first just in case other mutator threads are in
-     * nearby territory to also attempt to rechain this cell. This is not
-     * done in a thread-safe way and doesn't need to be since the consequence
-     * of the race condition [rare] is two back-to-back suspend-all attempts,
-     * which will be handled correctly.
-     */
-    cell->counter = PREDICTED_CHAIN_COUNTER_AVOID;
+    PredictedChainingCell newCell;
 
-    PredictedChainingCell *newCell =
-        (PredictedChainingCell *) malloc(sizeof(PredictedChainingCell));
+    /* Avoid back-to-back orders to the same cell */
+    cell->counter = PREDICTED_CHAIN_COUNTER_AVOID;
 
     int baseAddr = (int) cell + 4;   // PC is cur_addr + 4
     int branchOffset = tgtAddr - baseAddr;
 
-    newCell->branch = assembleChainingBranch(branchOffset, true);
-    newCell->clazz = clazz;
-    newCell->method = method;
+    newCell.branch = assembleChainingBranch(branchOffset, true);
+    newCell.clazz = clazz;
+    newCell.method = method;
+    newCell.counter = PREDICTED_CHAIN_COUNTER_RECHAIN;
 
     /*
-     * Reset the counter again in case other mutator threads got invoked
-     * between the previous rest and dvmSuspendAllThreads call.
-     */
-    newCell->counter = PREDICTED_CHAIN_COUNTER_RECHAIN;
-
-    /*
-     * Enter the work order to the queue for the compiler thread to patch the
-     * chaining cell.
+     * Enter the work order to the queue and the chaining cell will be patched
+     * the next time a safe point is entered.
      *
-     * No blocking call is added here because the patched result is not
-     * intended to be immediately consumed by the requesting thread. Its
-     * execution is simply resumed by chasing the class pointer to resolve the
-     * callsite.
+     * If the enqueuing fails reset the rechain count to a normal value so that
+     * it won't get indefinitely delayed.
      */
-    dvmCompilerWorkEnqueue((const u2 *) cell, kWorkOrderICPatch, newCell);
+    if (!inlineCachePatchEnqueue(cell, &newCell)) {
+        cell->counter = PREDICTED_CHAIN_COUNTER_RECHAIN;
+    }
 #endif
 done:
     return method;
@@ -1456,31 +1485,61 @@
  * Patch the inline cache content based on the content passed from the work
  * order.
  */
-bool dvmJitPatchInlineCache(void *cellPtr, void *contentPtr)
+void dvmCompilerPatchInlineCache(void)
 {
-    PredictedChainingCell *cellDest = (PredictedChainingCell *) cellPtr;
-    PredictedChainingCell *newContent = (PredictedChainingCell *) contentPtr;
+    int i;
+    PredictedChainingCell *minAddr, *maxAddr;
 
-    /* Stop the world */
-    dvmSuspendAllThreads(SUSPEND_FOR_IC_PATCH);
+    /* Nothing to be done */
+    if (gDvmJit.compilerICPatchIndex == 0) return;
 
+    /*
+     * Since all threads are already stopped we don't really need to acquire
+     * the lock. But race condition can be easily introduced in the future w/o
+     * paying attention so we still acquire the lock here.
+     */
+    dvmLockMutex(&gDvmJit.compilerICPatchLock);
 
-    COMPILER_TRACE_CHAINING(
-        LOGD("Jit Runtime: predicted chain %p from %s to %s (%s) patched",
-             cellDest, cellDest->clazz ? cellDest->clazz->descriptor : "NULL",
-             newContent->clazz->descriptor,
-             newContent->method->name));
+    //LOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
 
-    /* Install the new cell content */
-    *cellDest = *newContent;
+    /* Initialize the min/max address range */
+    minAddr = (PredictedChainingCell *)
+        ((char *) gDvmJit.codeCache + CODE_CACHE_SIZE);
+    maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
 
-    /* Then synchronize the I/D$ */
-    cacheflush((long) cellDest, (long) (cellDest+1), 0);
+    for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
+        PredictedChainingCell *cellAddr =
+            gDvmJit.compilerICPatchQueue[i].cellAddr;
+        PredictedChainingCell *cellContent =
+            &gDvmJit.compilerICPatchQueue[i].cellContent;
 
-    /* All done - resume all other threads */
-    dvmResumeAllThreads(SUSPEND_FOR_IC_PATCH);
+        if (cellAddr->clazz == NULL) {
+            COMPILER_TRACE_CHAINING(
+                LOGD("Jit Runtime: predicted chain %p to %s (%s) initialized",
+                     cellAddr,
+                     cellContent->clazz->descriptor,
+                     cellContent->method->name));
+        } else {
+            COMPILER_TRACE_CHAINING(
+                LOGD("Jit Runtime: predicted chain %p from %s to %s (%s) "
+                     "patched",
+                     cellAddr,
+                     cellAddr->clazz->descriptor,
+                     cellContent->clazz->descriptor,
+                     cellContent->method->name));
+        }
 
-    return true;
+        /* Patch the chaining cell */
+        *cellAddr = *cellContent;
+        minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
+        maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
+    }
+
+    /* Then synchronize the I/D cache */
+    cacheflush((long) minAddr, (long) (maxAddr+1), 0);
+
+    gDvmJit.compilerICPatchIndex = 0;
+    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
 }
 
 /*
@@ -1617,6 +1676,7 @@
         dvmUnlockMutex(&gDvmJit.tableLock);
         gDvmJit.translationChains = 0;
     }
+    gDvmJit.hasNewChain = false;
 }
 
 typedef struct jitProfileAddrToLine {
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 5be07aa..b0e16b8 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -4121,8 +4121,7 @@
 {
     bool res;
 
-    if (gDvmJit.codeCacheFull &&
-        (work->kind != kWorkOrderICPatch)) {
+    if (gDvmJit.codeCacheFull) {
         return false;
     }
 
@@ -4142,9 +4141,6 @@
             gDvmJit.printMe = oldPrintMe;;
             break;
         }
-        case kWorkOrderICPatch:
-            res = dvmJitPatchInlineCache((void *) work->pc, work->info);
-            break;
         default:
             res = false;
             dvmAbort();