Experimental x86 Jit trace selection
Experimental support for trace selection for x86 host mode operation.
Not enabled by default. Turned on by setting WITH_HOST_DALVIK true
and WITH_JIT true. When enabled, profiles during x86 fast interpreter
operation, selects hot traces and "compiles" traces consisting of jumps
back to the interpreter.
First in a series of experimental x86 support checkins.
Change-Id: I0e423ec58a7bf01f226cb486f55de2841fab1002
diff --git a/vm/compiler/codegen/x86/Assemble.c b/vm/compiler/codegen/x86/Assemble.c
index fbf53ca..31264ce 100644
--- a/vm/compiler/codegen/x86/Assemble.c
+++ b/vm/compiler/codegen/x86/Assemble.c
@@ -34,8 +34,6 @@
#endif
/*
- * FIXME - redo for x86
- *
* Translation layout in the code cache. Note that the codeAddress pointer
* in JitTable will point directly to the code body (field codeAddress). The
* chain cell offset codeAddress - 2, and (if present) executionCount is at
@@ -52,7 +50,7 @@
* | . .
* | | |
* | +----------------------------+
- * | | Chaining Cells | -> 12/16 bytes each, must be 4 byte aligned
+ * | | Chaining Cells | -> 16 bytes each, 8 byte aligned
* | . .
* | . .
* | | |
@@ -66,8 +64,8 @@
* | |
* +----------------------------+
* | Literal pool | -> 4-byte aligned, variable size
- * . .
- * . .
+ * . . Note: for x86 literals will
+ * . . generally appear inline.
* | |
* +----------------------------+
*
diff --git a/vm/compiler/codegen/x86/CodegenDriver.c b/vm/compiler/codegen/x86/CodegenDriver.c
index 69f637e..4a5d481 100644
--- a/vm/compiler/codegen/x86/CodegenDriver.c
+++ b/vm/compiler/codegen/x86/CodegenDriver.c
@@ -24,10 +24,63 @@
* applicable directory below this one.
*/
+extern X86LIR *loadConstant(CompilationUnit *cUnit, int rDest, int value);
+extern X86LIR *loadWordDisp(CompilationUnit *cUnit, int rBase,
+ int displacement, int rDest);
+extern void dvmCompilerFlushAllRegs(CompilationUnit *cUnit);
+extern void storeWordDisp(CompilationUnit *cUnit, int rBase,
+ int displacement, int rSrc);
+extern X86LIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc);
+
static int opcodeCoverage[kNumPackedOpcodes];
static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK];
/*
+ * Bail to the interpreter. Will not return to this trace.
+ * On entry, rPC must be set correctly.
+ */
+static void genPuntToInterp(CompilationUnit *cUnit, unsigned int offset)
+{
+ dvmCompilerFlushAllRegs(cUnit);
+ loadConstant(cUnit, rPC, (int)(cUnit->method->insns + offset));
+ loadWordDisp(cUnit, rEBP, 0, rECX); // Get glue
+ loadWordDisp(cUnit, rECX,
+ offsetof(InterpState, jitToInterpEntries.dvmJitToInterpPunt),
+ rEAX);
+ opReg(cUnit, kOpUncondBr, rEAX);
+}
+
+static void genInterpSingleStep(CompilationUnit *cUnit, MIR *mir)
+{
+ int flags = dexGetFlagsFromOpcode(mir->dalvikInsn.opcode);
+ int flagsToCheck = kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn |
+ kInstrCanThrow;
+
+ //If already optimized out, just ignore
+ if (mir->dalvikInsn.opcode == OP_NOP)
+ return;
+
+ //Ugly, but necessary. Flush all Dalvik regs so Interp can find them
+ dvmCompilerFlushAllRegs(cUnit);
+
+ if ((mir->next == NULL) || (flags & flagsToCheck)) {
+ genPuntToInterp(cUnit, mir->offset);
+ return;
+ }
+ int entryAddr = offsetof(InterpState,
+ jitToInterpEntries.dvmJitToInterpSingleStep);
+ loadWordDisp(cUnit, rEBP, 0, rECX); // Get glue
+ loadWordDisp(cUnit, rECX, entryAddr, rEAX); // rEAX<- entry address
+ /* rPC = dalvik pc */
+ loadConstant(cUnit, rPC, (int) (cUnit->method->insns + mir->offset));
+ /* rECX = dalvik pc of following instruction */
+ loadConstant(cUnit, rECX, (int) (cUnit->method->insns + mir->next->offset));
+ /* Pass on the stack */
+ storeWordDisp(cUnit, rESP, OUT_ARG0, rECX);
+ opReg(cUnit, kOpCall, rEAX);
+}
+
+/*
* The following are the first-level codegen routines that analyze the format
* of each bytecode then either dispatch special purpose codegen routines
* or produce corresponding Thumb instructions directly.
diff --git a/vm/compiler/codegen/x86/X86LIR.h b/vm/compiler/codegen/x86/X86LIR.h
index 62ac447..8acf015 100644
--- a/vm/compiler/codegen/x86/X86LIR.h
+++ b/vm/compiler/codegen/x86/X86LIR.h
@@ -27,7 +27,7 @@
* esp is native SP
*
* For interpreter:
- * edx is Dalvik PC (rPC)
+ * edi is Dalvik PC (rPC)
* ebx is rINST
*
* For JIT:
@@ -82,8 +82,8 @@
int nextFPTemp;
int numCoreRegs;
RegisterInfo *coreRegs;
- int numFPRegs;
- RegisterInfo *FPRegs;
+ int numMMRegs;
+ RegisterInfo *MMRegs;
} RegisterPool;
typedef enum OpSize {
@@ -99,7 +99,6 @@
typedef enum OpKind {
kOpMov,
- kOpMvn,
kOpCmp,
kOpLsl,
kOpLsr,
@@ -114,15 +113,11 @@
kOpAdc,
kOpSub,
kOpSbc,
- kOpRsub,
kOpMul,
kOpDiv,
kOpRem,
- kOpBic,
- kOpCmn,
kOpTst,
- kOpBkpt,
- kOpBlx,
+ kOpCall,
kOpPush,
kOpPop,
kOp2Char,
@@ -132,6 +127,37 @@
kOpUncondBr,
} OpKind;
+#define FP_REG_OFFSET 8
+
+typedef enum NativeRegisterPool {
+ rEAX = 0,
+ rECX = 1,
+ rEDX = 2,
+ rEBX = 3,
+ rESP = 4,
+ rEBP = 5,
+ rESI = 6,
+ rEDI = 7,
+ rXMM0 = 0 + FP_REG_OFFSET,
+ rXMM1 = 1 + FP_REG_OFFSET,
+ rXMM2 = 2 + FP_REG_OFFSET,
+ rXMM3 = 3 + FP_REG_OFFSET,
+ rXMM4 = 4 + FP_REG_OFFSET,
+ rXMM5 = 5 + FP_REG_OFFSET,
+ rXMM6 = 6 + FP_REG_OFFSET,
+ rXMM7 = 7 + FP_REG_OFFSET,
+} NativeRegisterPool;
+
+#define rPC rEDI
+#define rFP rESI
+#define rINST rEBX
+
+#define OUT_ARG0 0
+#define OUT_ARG1 4
+#define OUT_ARG2 8
+#define OUT_ARG3 12
+#define OUT_ARG4 16
+
typedef struct X86LIR {
LIR generic;
//X86Opcode opcode;