Introduce "just interpret" chainable pseudo-translation.
This is the first step towards enabling translation & self-cosim stress modes.
When trace selection begins, the trace head address is pinned and
remains in a limbo state until the translation is complete. Previously,
if the trace selected aborted for any reason, the trace head would remain
forever in limbo. This was not a correctness problem, but caused some
small performance anomolies and made life more difficult for self-cosimulation
mode.
This CL introduces a pseudo-translation that simply routes control to
the interpreter. When we detect that a trace selection attempt has
failed, the trace head is associated with this fully-chainable
pseudo-translation. This also has the benefit for self-cosimulation that
we are guaranteed forward progress.
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index bc39479..8e977c1 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1259,16 +1259,28 @@
info->codeAddress = (char*)info->codeAddress + 1;
}
-static u4 assembleBXPair(int branchOffset)
+/*
+ * Returns the skeleton bit pattern associated with an opcode. All
+ * variable fields are zeroed.
+ */
+static u4 getSkeleton(ArmOpCode op)
+{
+ return EncodingMap[op].skeleton;
+}
+
+static u4 assembleChainingBranch(int branchOffset, bool thumbTarget)
{
u4 thumb1, thumb2;
- if ((branchOffset < -2048) | (branchOffset > 2046)) {
- thumb1 = (0xf000 | ((branchOffset>>12) & 0x7ff));
- thumb2 = (0xf800 | ((branchOffset>> 1) & 0x7ff));
+ if (!thumbTarget) {
+ thumb1 = (getSkeleton(kThumbBlx1) | ((branchOffset>>12) & 0x7ff));
+ thumb2 = (getSkeleton(kThumbBlx2) | ((branchOffset>> 1) & 0x7ff));
+ } else if ((branchOffset < -2048) | (branchOffset > 2046)) {
+ thumb1 = (getSkeleton(kThumbBl1) | ((branchOffset>>12) & 0x7ff));
+ thumb2 = (getSkeleton(kThumbBl2) | ((branchOffset>> 1) & 0x7ff));
} else {
- thumb1 = (0xe000 | ((branchOffset>> 1) & 0x7ff));
- thumb2 = 0x4300; /* nop -> or r0, r0 */
+ thumb1 = (getSkeleton(kThumbBUncond) | ((branchOffset>> 1) & 0x7ff));
+ thumb2 = getSkeleton(kThumbOrr); /* nop -> or r0, r0 */
}
return thumb2<<16 | thumb1;
@@ -1278,7 +1290,8 @@
* Perform translation chain operation.
* For ARM, we'll use a pair of thumb instructions to generate
* an unconditional chaining branch of up to 4MB in distance.
- * Use a BL, though we don't really need the link. The format is
+ * Use a BL, because the generic "interpret" translation needs
+ * the link register to find the dalvik pc of teh target.
* 111HHooooooooooo
* Where HH is 10 for the 1st inst, and 11 for the second and
* the "o" field is each instruction's 11-bit contribution to the
@@ -1291,6 +1304,7 @@
int baseAddr = (u4) branchAddr + 4;
int branchOffset = (int) tgtAddr - baseAddr;
u4 newInst;
+ bool thumbTarget;
if (gDvm.sumThreadSuspendCount == 0) {
assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
@@ -1301,7 +1315,16 @@
LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
(int) branchAddr, (int) tgtAddr & -2));
- newInst = assembleBXPair(branchOffset);
+ /*
+ * NOTE: normally, all translations are Thumb[2] mode, with
+ * a single exception: the default TEMPLATE_INTERPRET
+ * pseudo-translation. If the need ever arises to
+ * mix Arm & Thumb[2] translations, the following code should be
+ * generalized.
+ */
+ thumbTarget = (tgtAddr != gDvmJit.interpretTemplate);
+
+ newInst = assembleChainingBranch(branchOffset, thumbTarget);
*branchAddr = newInst;
cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
@@ -1354,7 +1377,7 @@
* Compilation not made yet for the callee. Reset the counter to a small
* value and come back to check soon.
*/
- if (tgtAddr == 0) {
+ if ((tgtAddr == 0) || ((void*)tgtAddr == gDvmJit.interpretTemplate)) {
/*
* Wait for a few invocations (currently set to be 16) before trying
* to setup the chain again.
@@ -1388,7 +1411,7 @@
clazz->descriptor,
method->name));
- cell->branch = assembleBXPair(branchOffset);
+ cell->branch = assembleChainingBranch(branchOffset, true);
cell->clazz = clazz;
cell->method = method;
/*
@@ -1517,7 +1540,9 @@
dvmLockMutex(&gDvmJit.tableLock);
for (i = 0; i < gDvmJit.jitTableSize; i++) {
if (gDvmJit.pJitEntryTable[i].dPC &&
- gDvmJit.pJitEntryTable[i].codeAddress) {
+ gDvmJit.pJitEntryTable[i].codeAddress &&
+ (gDvmJit.pJitEntryTable[i].codeAddress !=
+ gDvmJit.interpretTemplate)) {
u4* lastAddress;
lastAddress =
dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
@@ -1573,6 +1598,10 @@
LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
return 0;
}
+ if (p->codeAddress == gDvmJit.interpretTemplate) {
+ LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY 0 0", (int)traceBase);
+ return 0;
+ }
pExecutionCount = (u4*) (traceBase);
pCellOffset = (u2*) (traceBase + 4);
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index 8c61322..3a46cac 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -53,6 +53,12 @@
(int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
}
+void *dvmCompilerGetInterpretTemplate()
+{
+ return (void*) ((int)gDvmJit.codeCache +
+ templateEntryOffsets[TEMPLATE_INTERPRET]);
+}
+
/* Architecture-specific initializations and checks go here */
static bool compilerArchVariantInit(void)
{
diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
index a4b9ae3..4178c23 100644
--- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
@@ -90,6 +90,12 @@
return true;
}
+void *dvmCompilerGetInterpretTemplate()
+{
+ return (void*) ((int)gDvmJit.codeCache +
+ templateEntryOffsets[TEMPLATE_INTERPRET]);
+}
+
static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
{
return false; /* punt to C handler */
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 0409135..02b9b79 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -54,6 +54,12 @@
(int) gDvmJit.codeCache + templateEntryOffsets[opCode]);
}
+void *dvmCompilerGetInterpretTemplate()
+{
+ return (void*) ((int)gDvmJit.codeCache +
+ templateEntryOffsets[TEMPLATE_INTERPRET]);
+}
+
/* Architecture-specific initializations and checks go here */
static bool compilerArchVariantInit(void)
{
diff --git a/vm/compiler/template/armv5te-vfp/TemplateOpList.h b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
index 1608920..d414e1b 100644
--- a/vm/compiler/template/armv5te-vfp/TemplateOpList.h
+++ b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
@@ -55,3 +55,4 @@
JIT_TEMPLATE(RESTORE_STATE)
JIT_TEMPLATE(STRING_COMPARETO)
JIT_TEMPLATE(STRING_INDEXOF)
+JIT_TEMPLATE(INTERPRET)
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S b/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S
new file mode 100644
index 0000000..5484400
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S
@@ -0,0 +1,23 @@
+ /*
+ * This handler transfers control to the interpeter without performing
+ * any lookups. It may be called either as part of a normal chaining
+ * operation, or from the transition code in header.S. We distinquish
+ * the two cases by looking at the link register. If called from a
+ * translation chain, it will point to the chaining Dalvik PC + 1.
+ * On entry:
+ * lr - if NULL:
+ * r1 - the Dalvik PC to begin interpretation.
+ * else
+ * [lr, #-1] contains Dalvik PC to begin interpretation
+ * rGLUE - pointer to interpState
+ * rFP - Dalvik frame pointer
+ */
+ cmp lr, #0
+ ldrne r1,[lr, #-1]
+ ldr r2, .LinterpPunt
+ mov r0, r1 @ set Dalvik PC
+ bx r2
+ @ doesn't return
+
+.LinterpPunt:
+ .word dvmJitToInterpPunt
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
index 88cc60a..1b5e6ea 100644
--- a/vm/compiler/template/armv5te/TemplateOpList.h
+++ b/vm/compiler/template/armv5te/TemplateOpList.h
@@ -40,3 +40,4 @@
JIT_TEMPLATE(RESTORE_STATE)
JIT_TEMPLATE(STRING_COMPARETO)
JIT_TEMPLATE(STRING_INDEXOF)
+JIT_TEMPLATE(INTERPRET)
diff --git a/vm/compiler/template/armv5te/header.S b/vm/compiler/template/armv5te/header.S
index 9651032..c257105 100644
--- a/vm/compiler/template/armv5te/header.S
+++ b/vm/compiler/template/armv5te/header.S
@@ -85,6 +85,9 @@
#define SAVEAREA_FROM_FP(_reg, _fpreg) \
sub _reg, _fpreg, #sizeofStackSaveArea
+#define EXPORT_PC() \
+ str rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
+
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
diff --git a/vm/compiler/template/armv7-a/TemplateOpList.h b/vm/compiler/template/armv7-a/TemplateOpList.h
index 1608920..d414e1b 100644
--- a/vm/compiler/template/armv7-a/TemplateOpList.h
+++ b/vm/compiler/template/armv7-a/TemplateOpList.h
@@ -55,3 +55,4 @@
JIT_TEMPLATE(RESTORE_STATE)
JIT_TEMPLATE(STRING_COMPARETO)
JIT_TEMPLATE(STRING_INDEXOF)
+JIT_TEMPLATE(INTERPRET)
diff --git a/vm/compiler/template/config-armv5te-vfp b/vm/compiler/template/config-armv5te-vfp
index b5ca397..fc968fe 100644
--- a/vm/compiler/template/config-armv5te-vfp
+++ b/vm/compiler/template/config-armv5te-vfp
@@ -45,6 +45,7 @@
op TEMPLATE_THROW_EXCEPTION_COMMON armv5te
op TEMPLATE_STRING_COMPARETO armv5te
op TEMPLATE_STRING_INDEXOF armv5te
+ op TEMPLATE_INTERPRET armv5te
op-end
diff --git a/vm/compiler/template/config-armv7-a b/vm/compiler/template/config-armv7-a
index 1d3d331..7f7b478 100644
--- a/vm/compiler/template/config-armv7-a
+++ b/vm/compiler/template/config-armv7-a
@@ -45,6 +45,7 @@
op TEMPLATE_THROW_EXCEPTION_COMMON armv5te
op TEMPLATE_STRING_COMPARETO armv5te
op TEMPLATE_STRING_INDEXOF armv5te
+ op TEMPLATE_INTERPRET armv5te
op-end
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index cc86848..6604773 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -92,6 +92,9 @@
#define SAVEAREA_FROM_FP(_reg, _fpreg) \
sub _reg, _fpreg, #sizeofStackSaveArea
+#define EXPORT_PC() \
+ str rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
+
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
@@ -1296,6 +1299,35 @@
bx lr
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_INTERPRET
+dvmCompiler_TEMPLATE_INTERPRET:
+/* File: armv5te/TEMPLATE_INTERPRET.S */
+ /*
+ * This handler transfers control to the interpeter without performing
+ * any lookups. It may be called either as part of a normal chaining
+ * operation, or from the transition code in header.S. We distinquish
+ * the two cases by looking at the link register. If called from a
+ * translation chain, it will point to the chaining Dalvik PC + 1.
+ * On entry:
+ * lr - if NULL:
+ * r1 - the Dalvik PC to begin interpretation.
+ * else
+ * [lr, #-1] contains Dalvik PC to begin interpretation
+ * rGLUE - pointer to interpState
+ * rFP - Dalvik frame pointer
+ */
+ cmp lr, #0
+ ldrne r1,[lr, #-1]
+ ldr r2, .LinterpPunt
+ mov r0, r1 @ set Dalvik PC
+ bx r2
+ @ doesn't return
+
+.LinterpPunt:
+ .word dvmJitToInterpPunt
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index fbfaf86..cee118b 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -92,6 +92,9 @@
#define SAVEAREA_FROM_FP(_reg, _fpreg) \
sub _reg, _fpreg, #sizeofStackSaveArea
+#define EXPORT_PC() \
+ str rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
+
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
@@ -1021,6 +1024,35 @@
bx lr
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_INTERPRET
+dvmCompiler_TEMPLATE_INTERPRET:
+/* File: armv5te/TEMPLATE_INTERPRET.S */
+ /*
+ * This handler transfers control to the interpeter without performing
+ * any lookups. It may be called either as part of a normal chaining
+ * operation, or from the transition code in header.S. We distinquish
+ * the two cases by looking at the link register. If called from a
+ * translation chain, it will point to the chaining Dalvik PC + 1.
+ * On entry:
+ * lr - if NULL:
+ * r1 - the Dalvik PC to begin interpretation.
+ * else
+ * [lr, #-1] contains Dalvik PC to begin interpretation
+ * rGLUE - pointer to interpState
+ * rFP - Dalvik frame pointer
+ */
+ cmp lr, #0
+ ldrne r1,[lr, #-1]
+ ldr r2, .LinterpPunt
+ mov r0, r1 @ set Dalvik PC
+ bx r2
+ @ doesn't return
+
+.LinterpPunt:
+ .word dvmJitToInterpPunt
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index 4d479da..aab5067 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -92,6 +92,9 @@
#define SAVEAREA_FROM_FP(_reg, _fpreg) \
sub _reg, _fpreg, #sizeofStackSaveArea
+#define EXPORT_PC() \
+ str rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
+
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
@@ -1296,6 +1299,35 @@
bx lr
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_INTERPRET
+dvmCompiler_TEMPLATE_INTERPRET:
+/* File: armv5te/TEMPLATE_INTERPRET.S */
+ /*
+ * This handler transfers control to the interpeter without performing
+ * any lookups. It may be called either as part of a normal chaining
+ * operation, or from the transition code in header.S. We distinquish
+ * the two cases by looking at the link register. If called from a
+ * translation chain, it will point to the chaining Dalvik PC + 1.
+ * On entry:
+ * lr - if NULL:
+ * r1 - the Dalvik PC to begin interpretation.
+ * else
+ * [lr, #-1] contains Dalvik PC to begin interpretation
+ * rGLUE - pointer to interpState
+ * rFP - Dalvik frame pointer
+ */
+ cmp lr, #0
+ ldrne r1,[lr, #-1]
+ ldr r2, .LinterpPunt
+ mov r0, r1 @ set Dalvik PC
+ bx r2
+ @ doesn't return
+
+.LinterpPunt:
+ .word dvmJitToInterpPunt
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*