Jit: Rework delayed start plus misc. cleanup

Defer initialization of jit to support upcoming feature to wait until
first screen is painted to start in order to avoid wasting effort on
jit'ng initialization code.  Timed delay in place for the moment.
To change the on/off state, call dvmSuspendAllThreads(), update the
value of gDvmJit.pJitTable and then dvmResumeAllThreads().
Each time a thread goes through the heavyweight check suspend path, returns
from a monitor lock/unlock or returns from a JNI call, it will refresh
its on/off state.

Also:
   Recognize and handle failure to increase size of JitTable.
   Avoid repeated lock/unlock of JitTable modification mutex during resize
   Make all work order enqueue actions non-blocking, which includes adding
      a non-blocking mutex lock: dvmTryLockMutex().
   Fix bug Jeff noticed where we were using a half-word form of a Thumb2
      instruction rather than the byte form.
   Minor comment changes.
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 8861102..648d8f4 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1709,6 +1709,14 @@
     genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
     /* Do the call */
     opReg(cUnit, kOpBlx, r2);
+    /*
+     * Refresh Jit's on/off status, which may have changed if we were
+     * sent to VM_MONITOR state above.
+     * TUNING: pointer chase, but must reload following call
+     */
+    loadWordDisp(cUnit, rGLUE, offsetof(InterpState, ppJitProfTable), r0);
+    loadWordDisp(cUnit, r0, 0, r0);
+    storeWordDisp(cUnit, rGLUE, offsetof(InterpState, pJitProfTable), r0);
 #if defined(WITH_DEADLOCK_PREDICTION)
     if (isEnter) {
         loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0);
@@ -1786,6 +1794,7 @@
         }
         case OP_CONST_WIDE_32: {
             //TUNING: single routine to load constant pair for support doubles
+            //TUNING: load 0/-1 separately to avoid load dependency
             rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
             loadConstantValue(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
             opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
@@ -2165,6 +2174,7 @@
         case OP_INT_TO_LONG:
             rlSrc = updateLoc(cUnit, rlSrc);
             rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
+            //TUNING: shouldn't loadValueDirect already check for phys reg?
             if (rlSrc.location == kLocPhysReg) {
                 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
             } else {
@@ -2227,6 +2237,7 @@
         rlDest = getDestLocWide(cUnit, mir, 0, 1);
         rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
         loadConstantValue(cUnit, rlResult.lowReg, BBBB);
+        //TUNING: do high separately to avoid load dependency
         opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
         storeValueWide(cUnit, rlDest, rlResult);
     } else if (dalvikOpCode == OP_CONST_16) {