JIT: Reworked the assembler to be smarter about short instruction forms

Previously, the JIT wasn't generating short-form compare and branch on
zero/not zero instructions for Thumb2.  The reason was that these only
allow a 1-byte displacement, and when they didn't reach the assembler would
abort the trace, split it in half and try again.  This change re-enables
cbz, cbnz generation and introduces a relatively lightweight retry
mechanism.

Also includes changes for Thumb2 to always generate large displacement
literal loads and conditional branches to minimize the number of retry
attempts.

Change-Id: Icf066836fad203f5c0fcbbb2ae8e1aa73d1cf816
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index c1b08a3..4f975b3 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -20,9 +20,12 @@
 
 #include "../../CompilerInternals.h"
 #include "ArmLIR.h"
+#include "Codegen.h"
 #include <unistd.h>             /* for cacheflush */
 #include <sys/mman.h>           /* for protection change */
 
+#define MAX_ASSEMBLER_RETRIES 10
+
 /*
  * opcode: ArmOpCode enum
  * skeleton: pre-designated bit-pattern for this opcode
@@ -914,8 +917,14 @@
     return sizeof(JitTraceDescription) + ((runCount+1) * sizeof(JitTraceRun));
 }
 
-/* Return TRUE if error happens */
-static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
+/*
+ * Assemble the LIR into binary instruction format.  Note that we may
+ * discover that pc-relative displacements may not fit the selected
+ * instruction.  In those cases we will try to substitute a new code
+ * sequence or request that the trace be shortened and retried.
+ */
+static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
+                                            intptr_t startAddr)
 {
     short *bufferAddr = (short *) cUnit->codeBuffer;
     ArmLIR *lir;
@@ -952,9 +961,9 @@
                 dvmCompilerAbort(cUnit);
             }
             if ((lir->opCode == kThumb2LdrPcRel12) && (delta > 4091)) {
-                return true;
+                return kRetryHalve;
             } else if (delta > 1020) {
-                return true;
+                return kRetryHalve;
             }
             if (lir->opCode == kThumb2Vldrs) {
                 lir->operands[2] = delta >> 2;
@@ -968,11 +977,23 @@
             intptr_t target = targetLIR->generic.offset;
             int delta = target - pc;
             if (delta > 126 || delta < 0) {
-                /*
-                 * TODO: allow multiple kinds of assembler failure to allow
-                 * change of code patterns when things don't fit.
-                 */
-                return true;
+                /* Convert to cmp rx,#0 / b[eq/ne] tgt pair */
+                ArmLIR *newInst = dvmCompilerNew(sizeof(ArmLIR), true);
+                /* Make new branch instruction and insert after */
+                newInst->opCode = kThumbBCond;
+                newInst->operands[0] = 0;
+                newInst->operands[1] = (lir->opCode == kThumb2Cbz) ?
+                                        kArmCondEq : kArmCondNe;
+                newInst->generic.target = lir->generic.target;
+                dvmCompilerSetupResourceMasks(newInst);
+                dvmCompilerInsertLIRAfter((LIR *)lir, (LIR *)newInst);
+                /* Convert the cb[n]z to a cmp rx, #0 ] */
+                lir->opCode = kThumbCmpRI8;
+                lir->operands[0] = lir->operands[1];
+                lir->operands[1] = 0;
+                lir->generic.target = 0;
+                dvmCompilerSetupResourceMasks(lir);
+                return kRetryAll;
             } else {
                 lir->operands[1] = delta >> 1;
             }
@@ -983,7 +1004,7 @@
             intptr_t target = targetLIR->generic.offset;
             int delta = target - pc;
             if ((lir->opCode == kThumbBCond) && (delta > 254 || delta < -256)) {
-                return true;
+                return kRetryHalve;
             }
             lir->operands[0] = delta >> 1;
         } else if (lir->opCode == kThumbBUncond) {
@@ -1029,18 +1050,12 @@
                     bits |= value;
                     break;
                 case kFmtBrOffset:
-                    /*
-                     * NOTE: branch offsets are not handled here, but
-                     * in the main assembly loop (where label values
-                     * are known).  For reference, here is what the
-                     * encoder handing would be:
-                         value = ((operand  & 0x80000) >> 19) << 26;
-                         value |= ((operand & 0x40000) >> 18) << 11;
-                         value |= ((operand & 0x20000) >> 17) << 13;
-                         value |= ((operand & 0x1f800) >> 11) << 16;
-                         value |= (operand  & 0x007ff);
-                         bits |= value;
-                     */
+                    value = ((operand  & 0x80000) >> 19) << 26;
+                    value |= ((operand & 0x40000) >> 18) << 11;
+                    value |= ((operand & 0x20000) >> 17) << 13;
+                    value |= ((operand & 0x1f800) >> 11) << 16;
+                    value |= (operand  & 0x007ff);
+                    bits |= value;
                     break;
                 case kFmtShift5:
                     value = ((operand & 0x1c) >> 2) << 12;
@@ -1117,7 +1132,7 @@
         }
         *bufferAddr++ = bits & 0xffff;
     }
-    return false;
+    return kSuccess;
 }
 
 #if defined(SIGNATURE_BREAKPOINT)
@@ -1277,16 +1292,29 @@
         return;
     }
 
-    bool assemblerFailure = assembleInstructions(
-        cUnit, (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
-
     /*
-     * Currently the only reason that can cause the assembler to fail is due to
-     * trace length - cut it in half and retry.
+     * Attempt to assemble the trace.  Note that assembleInstructions
+     * may rewrite the code sequence and request a retry.
      */
-    if (assemblerFailure) {
-        cUnit->halveInstCount = true;
-        return;
+    cUnit->assemblerStatus = assembleInstructions(cUnit,
+          (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
+
+    switch(cUnit->assemblerStatus) {
+        case kSuccess:
+            break;
+        case kRetryAll:
+            if (cUnit->assemblerRetries < MAX_ASSEMBLER_RETRIES) {
+                return;
+            }
+            /* Too many retries - reset and try cutting the trace in half */
+            cUnit->assemblerRetries = 0;
+            cUnit->assemblerStatus = kRetryHalve;
+            return;
+        case kRetryHalve:
+            return;
+        default:
+             LOGE("Unexpected assembler status: %d", cUnit->assemblerStatus);
+             dvmAbort();
     }
 
 #if defined(SIGNATURE_BREAKPOINT)