Implement chaining up to the first 64 cases in a switch statement.
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index 59933b1..b37efd2 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -3285,6 +3285,135 @@
return false;
}
+/*
+ * Find the matching case.
+ *
+ * return values:
+ * r0 (low 32-bit): pc of the chaining cell corresponding to the resolved case,
+ * including default which is placed at MIN(size, MAX_CHAINED_SWITCH_CASES).
+ * r1 (high 32-bit): the branch offset of the matching case (only for indexes
+ * above MAX_CHAINED_SWITCH_CASES).
+ *
+ * Instructions around the call are:
+ *
+ * mov r2, pc
+ * blx &findPackedSwitchIndex
+ * mov pc, r0
+ * .align4
+ * chaining cell for case 0 [8 bytes]
+ * chaining cell for case 1 [8 bytes]
+ * :
+ * chaining cell for case MIN(size, MAX_CHAINED_SWITCH_CASES)-1 [8 bytes]
+ * chaining cell for case default [8 bytes]
+ * noChain exit
+ */
+s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc)
+{
+ int size;
+ int firstKey;
+ const int *entries;
+ int index;
+ int jumpIndex;
+ int caseDPCOffset = 0;
+ /* In Thumb mode pc is 4 ahead of the "mov r2, pc" instruction */
+ int chainingPC = (pc + 4) & ~3;
+
+ /*
+ * Packed switch data format:
+ * ushort ident = 0x0100 magic value
+ * ushort size number of entries in the table
+ * int first_key first (and lowest) switch case value
+ * int targets[size] branch targets, relative to switch opcode
+ *
+ * Total size is (4+size*2) 16-bit code units.
+ */
+ size = switchData[1];
+ assert(size > 0);
+
+ firstKey = switchData[2];
+ firstKey |= switchData[3] << 16;
+
+
+ /* The entries are guaranteed to be aligned on a 32-bit boundary;
+ * we can treat them as a native int array.
+ */
+ entries = (const int*) &switchData[4];
+ assert(((u4)entries & 0x3) == 0);
+
+ index = testVal - firstKey;
+
+ /* Jump to the default cell */
+ if (index < 0 || index >= size) {
+ jumpIndex = MIN(size, MAX_CHAINED_SWITCH_CASES);
+ /* Jump to the non-chaining exit point */
+ } else if (index >= MAX_CHAINED_SWITCH_CASES) {
+ jumpIndex = MAX_CHAINED_SWITCH_CASES + 1;
+ caseDPCOffset = entries[index];
+ /* Jump to the inline chaining cell */
+ } else {
+ jumpIndex = index;
+ }
+
+ chainingPC += jumpIndex * 8;
+ return (((s8) caseDPCOffset) << 32) | (u8) chainingPC;
+}
+
+/* See comments for findPackedSwitchIndex */
+s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc)
+{
+ int size;
+ const int *keys;
+ const int *entries;
+ int chainingPC = (pc + 4) & ~3;
+ int i;
+
+ /*
+ * Sparse switch data format:
+ * ushort ident = 0x0200 magic value
+ * ushort size number of entries in the table; > 0
+ * int keys[size] keys, sorted low-to-high; 32-bit aligned
+ * int targets[size] branch targets, relative to switch opcode
+ *
+ * Total size is (2+size*4) 16-bit code units.
+ */
+
+ size = switchData[1];
+ assert(size > 0);
+
+ /* The keys are guaranteed to be aligned on a 32-bit boundary;
+ * we can treat them as a native int array.
+ */
+ keys = (const int*) &switchData[2];
+ assert(((u4)keys & 0x3) == 0);
+
+ /* The entries are guaranteed to be aligned on a 32-bit boundary;
+ * we can treat them as a native int array.
+ */
+ entries = keys + size;
+ assert(((u4)entries & 0x3) == 0);
+
+ /*
+ * Run through the list of keys, which are guaranteed to
+ * be sorted low-to-high.
+ *
+ * Most tables have 3-4 entries. Few have more than 10. A binary
+ * search here is probably not useful.
+ */
+ for (i = 0; i < size; i++) {
+ int k = keys[i];
+ if (k == testVal) {
+ /* MAX_CHAINED_SWITCH_CASES + 1 is the start of the overflow case */
+ int jumpIndex = (i < MAX_CHAINED_SWITCH_CASES) ?
+ i : MAX_CHAINED_SWITCH_CASES + 1;
+ chainingPC += jumpIndex * 8;
+ return (((s8) entries[i]) << 32) | (u8) chainingPC;
+ } else if (k > testVal) {
+ break;
+ }
+ }
+ return chainingPC + MIN(size, MAX_CHAINED_SWITCH_CASES) * 8;
+}
+
static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir)
{
OpCode dalvikOpCode = mir->dalvikInsn.opCode;
@@ -3296,8 +3425,8 @@
genExportPC(cUnit, mir);
loadValueDirectFixed(cUnit, rlSrc, r0);
loadConstant(cUnit, r2, (int)dvmInterpHandleFillArrayData);
- loadConstant(cUnit, r1, (mir->dalvikInsn.vB << 1) +
- (int) (cUnit->method->insns + mir->offset));
+ loadConstant(cUnit, r1,
+ (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB));
opReg(cUnit, kOpBlx, r2);
clobberCallRegs(cUnit);
/* generate a branch over if successful */
@@ -3312,11 +3441,9 @@
break;
}
/*
- * TODO
- * - Add a 1 to 3-entry per-location cache here to completely
- * bypass the dvmInterpHandle[Packed/Sparse]Switch call w/ chaining
- * - Use out-of-line handlers for both of these. These ops
- * handle their own register allocation.
+ * Compute the goto target of up to
+ * MIN(switchSize, MAX_CHAINED_SWITCH_CASES) + 1 chaining cells.
+ * See the comment before findPackedSwitchIndex for the code layout.
*/
case OP_PACKED_SWITCH:
case OP_SPARSE_SWITCH: {
@@ -3324,23 +3451,24 @@
flushAllRegs(cUnit); /* Send everything to home location */
loadValueDirectFixed(cUnit, rlSrc, r1);
lockAllTemps(cUnit);
- // Exit to the interpreter, setting up r4PC
+ const u2 *switchData =
+ cUnit->method->insns + mir->offset + mir->dalvikInsn.vB;
+ u2 size = switchData[1];
+
if (dalvikOpCode == OP_PACKED_SWITCH) {
- loadConstant(cUnit, r4PC, (int)dvmInterpHandlePackedSwitch);
+ loadConstant(cUnit, r4PC, (int)findPackedSwitchIndex);
} else {
- loadConstant(cUnit, r4PC, (int)dvmInterpHandleSparseSwitch);
+ loadConstant(cUnit, r4PC, (int)findSparseSwitchIndex);
}
- loadConstant(cUnit, r0, (mir->dalvikInsn.vB << 1) +
- (int) (cUnit->method->insns + mir->offset));
+ /* r0 <- Addr of the switch data */
+ loadConstant(cUnit, r0,
+ (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB));
+ /* r2 <- pc of the instruction following the blx */
+ opRegReg(cUnit, kOpMov, r2, rpc);
opReg(cUnit, kOpBlx, r4PC);
clobberCallRegs(cUnit);
- loadConstant(cUnit, r1, (int)(cUnit->method->insns + mir->offset));
- loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
- jitToInterpEntries.dvmJitToInterpNoChain), r2);
- opRegReg(cUnit, kOpAdd, r0, r0);
- opRegRegReg(cUnit, kOpAdd, r4PC, r0, r1);
- opReg(cUnit, kOpBlx, r2);
- clobberCallRegs(cUnit);
+ /* pc <- computed goto target */
+ opRegReg(cUnit, kOpMov, rpc, r0);
break;
}
default:
@@ -4532,6 +4660,22 @@
}
}
+ /*
+ * Generate the branch to the dvmJitToInterpNoChain entry point at the end
+ * of all chaining cells for the overflow cases.
+ */
+ if (cUnit->switchOverflowPad) {
+ loadConstant(cUnit, r0, (int) cUnit->switchOverflowPad);
+ loadWordDisp(cUnit, rGLUE, offsetof(InterpState,
+ jitToInterpEntries.dvmJitToInterpNoChain), r2);
+ opRegReg(cUnit, kOpAdd, r1, r1);
+ opRegRegReg(cUnit, kOpAdd, r4PC, r0, r1);
+#if defined(EXIT_STATS)
+ loadConstant(cUnit, r0, kSwitchOverflow);
+#endif
+ opReg(cUnit, kOpBlx, r2);
+ }
+
dvmCompilerApplyGlobalOptimizations(cUnit);
}