JIT: Reworked the assembler to be smarter about short instruction forms
Previously, the JIT wasn't generating short-form compare and branch on
zero/not zero instructions for Thumb2. The reason was that these only
allow a 1-byte displacement, and when they didn't reach the assembler would
abort the trace, split it in half and try again. This change re-enables
cbz, cbnz generation and introduces a relatively lightweight retry
mechanism.
Also includes changes for Thumb2 to always generate large displacement
literal loads and conditional branches to minimize the number of retry
attempts.
Change-Id: Icf066836fad203f5c0fcbbb2ae8e1aa73d1cf816
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 21aadec..82b97e5 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -152,6 +152,12 @@
struct LoopAnalysis;
struct RegisterPool;
+typedef enum AssemblerStatus {
+ kSuccess,
+ kRetryAll,
+ kRetryHalve
+} AssemblerStatus;
+
typedef struct CompilationUnit {
int numInsts;
int numBlocks;
@@ -166,11 +172,12 @@
int headerSize; // bytes before the first code ptr
int dataOffset; // starting offset of literal pool
int totalSize; // header + code size
+ AssemblerStatus assemblerStatus; // Success or fix and retry
+ int assemblerRetries; // How many times tried to fix assembly
unsigned char *codeBuffer;
void *baseAddr;
bool printMe;
bool allSingleStep;
- bool halveInstCount;
bool executionCount; // Add code to count trace executions
bool hasLoop; // Contains a loop
bool hasInvoke; // Contains an invoke instruction
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index a828886..8e5a5e2 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -918,15 +918,17 @@
/* Convert MIR to LIR, etc. */
dvmCompilerMIR2LIR(&cUnit);
- /* Convert LIR into machine code. */
- dvmCompilerAssembleLIR(&cUnit, info);
+ /* Convert LIR into machine code. Loop for recoverable retries */
+ do {
+ dvmCompilerAssembleLIR(&cUnit, info);
+ cUnit.assemblerRetries++;
+ if (cUnit.printMe && cUnit.assemblerStatus != kSuccess)
+ LOGD("Assembler abort #%d on %d",cUnit.assemblerRetries,
+ cUnit.assemblerStatus);
+ } while (cUnit.assemblerStatus == kRetryAll);
if (cUnit.printMe) {
- if (cUnit.halveInstCount) {
- LOGD("Assembler aborted");
- } else {
- dvmCompilerCodegenDump(&cUnit);
- }
+ dvmCompilerCodegenDump(&cUnit);
LOGD("End %s%s, %d Dalvik instructions",
desc->method->clazz->descriptor, desc->method->name,
cUnit.numInsts);
@@ -935,18 +937,17 @@
/* Reset the compiler resource pool */
dvmCompilerArenaReset();
- /* Success */
- if (!cUnit.halveInstCount) {
-#if defined(WITH_JIT_TUNING)
- methodStats->nativeSize += cUnit.totalSize;
-#endif
- return info->codeAddress != NULL;
-
- /* Halve the instruction count and retry again */
- } else {
+ if (cUnit.assemblerStatus == kRetryHalve) {
+ /* Halve the instruction count and start from the top */
return dvmCompileTrace(desc, cUnit.numInsts / 2, info, bailPtr,
optHints);
}
+
+ assert(cUnit.assemblerStatus == kSuccess);
+#if defined(WITH_JIT_TUNING)
+ methodStats->nativeSize += cUnit.totalSize;
+#endif
+ return info->codeAddress != NULL;
}
/*
@@ -1290,7 +1291,7 @@
/* Convert LIR into machine code. */
dvmCompilerAssembleLIR(cUnit, info);
- if (cUnit->halveInstCount) {
+ if (cUnit->assemblerStatus != kSuccess) {
return false;
}
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index c1b08a3..4f975b3 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -20,9 +20,12 @@
#include "../../CompilerInternals.h"
#include "ArmLIR.h"
+#include "Codegen.h"
#include <unistd.h> /* for cacheflush */
#include <sys/mman.h> /* for protection change */
+#define MAX_ASSEMBLER_RETRIES 10
+
/*
* opcode: ArmOpCode enum
* skeleton: pre-designated bit-pattern for this opcode
@@ -914,8 +917,14 @@
return sizeof(JitTraceDescription) + ((runCount+1) * sizeof(JitTraceRun));
}
-/* Return TRUE if error happens */
-static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
+/*
+ * Assemble the LIR into binary instruction format. Note that we may
+ * discover that pc-relative displacements may not fit the selected
+ * instruction. In those cases we will try to substitute a new code
+ * sequence or request that the trace be shortened and retried.
+ */
+static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
+ intptr_t startAddr)
{
short *bufferAddr = (short *) cUnit->codeBuffer;
ArmLIR *lir;
@@ -952,9 +961,9 @@
dvmCompilerAbort(cUnit);
}
if ((lir->opCode == kThumb2LdrPcRel12) && (delta > 4091)) {
- return true;
+ return kRetryHalve;
} else if (delta > 1020) {
- return true;
+ return kRetryHalve;
}
if (lir->opCode == kThumb2Vldrs) {
lir->operands[2] = delta >> 2;
@@ -968,11 +977,23 @@
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
if (delta > 126 || delta < 0) {
- /*
- * TODO: allow multiple kinds of assembler failure to allow
- * change of code patterns when things don't fit.
- */
- return true;
+ /* Convert to cmp rx,#0 / b[eq/ne] tgt pair */
+ ArmLIR *newInst = dvmCompilerNew(sizeof(ArmLIR), true);
+ /* Make new branch instruction and insert after */
+ newInst->opCode = kThumbBCond;
+ newInst->operands[0] = 0;
+ newInst->operands[1] = (lir->opCode == kThumb2Cbz) ?
+ kArmCondEq : kArmCondNe;
+ newInst->generic.target = lir->generic.target;
+ dvmCompilerSetupResourceMasks(newInst);
+ dvmCompilerInsertLIRAfter((LIR *)lir, (LIR *)newInst);
+ /* Convert the cb[n]z to a cmp rx, #0 ] */
+ lir->opCode = kThumbCmpRI8;
+ lir->operands[0] = lir->operands[1];
+ lir->operands[1] = 0;
+ lir->generic.target = 0;
+ dvmCompilerSetupResourceMasks(lir);
+ return kRetryAll;
} else {
lir->operands[1] = delta >> 1;
}
@@ -983,7 +1004,7 @@
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
if ((lir->opCode == kThumbBCond) && (delta > 254 || delta < -256)) {
- return true;
+ return kRetryHalve;
}
lir->operands[0] = delta >> 1;
} else if (lir->opCode == kThumbBUncond) {
@@ -1029,18 +1050,12 @@
bits |= value;
break;
case kFmtBrOffset:
- /*
- * NOTE: branch offsets are not handled here, but
- * in the main assembly loop (where label values
- * are known). For reference, here is what the
- * encoder handing would be:
- value = ((operand & 0x80000) >> 19) << 26;
- value |= ((operand & 0x40000) >> 18) << 11;
- value |= ((operand & 0x20000) >> 17) << 13;
- value |= ((operand & 0x1f800) >> 11) << 16;
- value |= (operand & 0x007ff);
- bits |= value;
- */
+ value = ((operand & 0x80000) >> 19) << 26;
+ value |= ((operand & 0x40000) >> 18) << 11;
+ value |= ((operand & 0x20000) >> 17) << 13;
+ value |= ((operand & 0x1f800) >> 11) << 16;
+ value |= (operand & 0x007ff);
+ bits |= value;
break;
case kFmtShift5:
value = ((operand & 0x1c) >> 2) << 12;
@@ -1117,7 +1132,7 @@
}
*bufferAddr++ = bits & 0xffff;
}
- return false;
+ return kSuccess;
}
#if defined(SIGNATURE_BREAKPOINT)
@@ -1277,16 +1292,29 @@
return;
}
- bool assemblerFailure = assembleInstructions(
- cUnit, (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
-
/*
- * Currently the only reason that can cause the assembler to fail is due to
- * trace length - cut it in half and retry.
+ * Attempt to assemble the trace. Note that assembleInstructions
+ * may rewrite the code sequence and request a retry.
*/
- if (assemblerFailure) {
- cUnit->halveInstCount = true;
- return;
+ cUnit->assemblerStatus = assembleInstructions(cUnit,
+ (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
+
+ switch(cUnit->assemblerStatus) {
+ case kSuccess:
+ break;
+ case kRetryAll:
+ if (cUnit->assemblerRetries < MAX_ASSEMBLER_RETRIES) {
+ return;
+ }
+ /* Too many retries - reset and try cutting the trace in half */
+ cUnit->assemblerRetries = 0;
+ cUnit->assemblerStatus = kRetryHalve;
+ return;
+ case kRetryHalve:
+ return;
+ default:
+ LOGE("Unexpected assembler status: %d", cUnit->assemblerStatus);
+ dvmAbort();
}
#if defined(SIGNATURE_BREAKPOINT)
diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h
index ca0a843..be74e3f 100644
--- a/vm/compiler/codegen/arm/Codegen.h
+++ b/vm/compiler/codegen/arm/Codegen.h
@@ -78,6 +78,8 @@
extern void dvmCompilerRegCopyWide(CompilationUnit *cUnit, int destLo,
int destHi, int srcLo, int srcHi);
+extern void dvmCompilerSetupResourceMasks(ArmLIR *lir);
+
extern void dvmCompilerFlushRegImpl(CompilationUnit *cUnit, int rBase,
int displacement, int rSrc, OpSize size);
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index dcabd8e..1ff9409 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -4426,6 +4426,12 @@
templateEntryOffsets[TEMPLATE_INTERPRET]);
}
+/* Needed by the Assembler */
+void dvmCompilerSetupResourceMasks(ArmLIR *lir)
+{
+ setupResourceMasks(lir);
+}
+
/* Needed by the ld/st optmizatons */
ArmLIR* dvmCompilerRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc)
{
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c
index c7b52fd..2bf2940 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.c
@@ -171,7 +171,7 @@
dataTarget = addWordData(cUnit, value, false);
}
ArmLIR *loadPcRel = dvmCompilerNew(sizeof(ArmLIR), true);
- loadPcRel->opCode = LOWREG(rDest) ? kThumbLdrPcRel : kThumb2LdrPcRel12;
+ loadPcRel->opCode = kThumb2LdrPcRel12;
loadPcRel->generic.target = (LIR *) dataTarget;
loadPcRel->operands[0] = rDest;
setupResourceMasks(loadPcRel);
@@ -227,7 +227,7 @@
static ArmLIR *opCondBranch(CompilationUnit *cUnit, ArmConditionCode cc)
{
- return newLIR2(cUnit, kThumbBCond, 0 /* offset to be patched */, cc);
+ return newLIR2(cUnit, kThumb2BCond, 0 /* offset to be patched */, cc);
}
static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value)
@@ -1100,15 +1100,7 @@
{
ArmLIR *branch;
int modImm;
- /*
- * TODO: re-enable usage of kThumb2Cbz & kThumb2Cbnz once assembler is
- * enhanced to allow us to replace code patterns when instructions don't
- * reach. Currently, CB[N]Z is causing too many assembler aborts.
- * What we want to do is emit the short forms, and then replace them with
- * longer versions when needed.
- */
-
- if (0 && (LOWREG(reg)) && (checkValue == 0) &&
+ if ((LOWREG(reg)) && (checkValue == 0) &&
((cond == kArmCondEq) || (cond == kArmCondNe))) {
branch = newLIR2(cUnit,
(cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,