Improved codegen for inline, continuing codegen restructuring
Added support for Thumb2 IT. Moved compare-long and floating point
comparisons inline. Temporarily disabled use of Thumb2 CBZ & CBNZ
because they were causing too many out-of-range assembly restarts.
Bug fix for LIR3 assert.
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index eeee00b..3d55abd 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -82,6 +82,13 @@
assert((unsigned)(nc-'0') < 4);
operand = lir->operands[nc-'0'];
switch(*fmt++) {
+ case 'b':
+ strcpy(tbuf,"0000");
+ for (i=3; i>= 0; i--) {
+ tbuf[i] += operand & 1;
+ operand >>= 1;
+ }
+ break;
case 'n':
operand = ~expandImmediate(operand);
sprintf(tbuf,"%d [0x%x]", operand, operand);
@@ -115,28 +122,28 @@
case 'c':
switch (operand) {
case ARM_COND_EQ:
- strcpy(tbuf, "beq");
+ strcpy(tbuf, "eq");
break;
case ARM_COND_NE:
- strcpy(tbuf, "bne");
+ strcpy(tbuf, "ne");
break;
case ARM_COND_LT:
- strcpy(tbuf, "blt");
+ strcpy(tbuf, "lt");
break;
case ARM_COND_GE:
- strcpy(tbuf, "bge");
+ strcpy(tbuf, "ge");
break;
case ARM_COND_GT:
- strcpy(tbuf, "bgt");
+ strcpy(tbuf, "gt");
break;
case ARM_COND_LE:
- strcpy(tbuf, "ble");
+ strcpy(tbuf, "le");
break;
case ARM_COND_CS:
- strcpy(tbuf, "bcs");
+ strcpy(tbuf, "cs");
break;
case ARM_COND_MI:
- strcpy(tbuf, "bmi");
+ strcpy(tbuf, "mi");
break;
default:
strcpy(tbuf, "");
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 7d7fcab..001486d 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -196,12 +196,20 @@
typedef enum ArmConditionCode {
ARM_COND_EQ = 0x0, /* 0000 */
ARM_COND_NE = 0x1, /* 0001 */
- ARM_COND_LT = 0xb, /* 1011 */
+ ARM_COND_CS = 0x2, /* 0010 */
+ ARM_COND_CC = 0x3, /* 0011 */
+ ARM_COND_MI = 0x4, /* 0100 */
+ ARM_COND_PL = 0x5, /* 0101 */
+ ARM_COND_VS = 0x6, /* 0110 */
+ ARM_COND_VC = 0x7, /* 0111 */
+ ARM_COND_HI = 0x8, /* 1000 */
+ ARM_COND_LS = 0x9, /* 1001 */
ARM_COND_GE = 0xa, /* 1010 */
+ ARM_COND_LT = 0xb, /* 1011 */
ARM_COND_GT = 0xc, /* 1100 */
ARM_COND_LE = 0xd, /* 1101 */
- ARM_COND_CS = 0x2, /* 0010 */
- ARM_COND_MI = 0x4, /* 0100 */
+ ARM_COND_AL = 0xe, /* 1110 */
+ ARM_COND_NV = 0xf, /* 1111 */
} ArmConditionCode;
#define isPseudoOpCode(opCode) ((int)(opCode) < 0)
@@ -467,6 +475,16 @@
rd[11..8] imm8 */
THUMB2_SBC_RRI8, /* sbc [111100010111] rn[19..16] [0] imm3
rd[11..8] imm8 */
+ THUMB2_IT, /* it [10111111] firstcond[7-4] mask[3-0] */
+ THUMB2_FMSTAT, /* fmstat [11101110111100011111101000010000] */
+ THUMB2_VCMPED, /* vcmpe [111011101] D [11011] rd[15-12] [1011]
+ E [1] M [0] rm[3-0] */
+ THUMB2_VCMPES, /* vcmpe [111011101] D [11010] rd[15-12] [1011]
+ E [1] M [0] rm[3-0] */
+ THUMB2_LDR_PC_REL12, /* ldr rd,[pc,#imm12] [1111100011011111] rt[15-12]
+ imm12[11-0] */
+ THUMB2_B_COND, /* b<c> [1110] S cond[25-22] imm6[21-16] [10]
+ J1 [0] J2 imm11[10..0] */
ARM_LAST,
} ArmOpCode;
@@ -498,6 +516,7 @@
LSB, /* least significant bit using [14..12][7..6] */
BWIDTH, /* bit-field width, encoded as width-1 */
SHIFT5, /* Shift count, [14..12,7..6] */
+ BROFFSET, /* Signed extended [26,11,13,21-16,10-0]:0 */
} ArmEncodingKind;
/* Struct used to define the snippet positions for each Thumb opcode */
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 144a416..f391288 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -69,6 +69,7 @@
* m -> Thumb2 modified immediate
* n -> complimented Thumb2 modified immediate
* M -> Thumb2 16-bit zero-extended immediate
+ * b -> 4-digit binary
*
* [!] escape. To insert "!", use "!!"
*/
@@ -111,9 +112,9 @@
IS_TERTIARY_OP | CLOBBER_DEST,
"add", "r!0d, pc, #!1E", 1),
ENCODING_MAP(THUMB_ADD_SP_REL, 0xa800,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
- "add", "r!0d, sp, #!1E", 1),
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "add", "r!0d, sp, #!2E", 1),
ENCODING_MAP(THUMB_ADD_SPI7, 0xb000,
BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
IS_UNARY_OP | CLOBBER_DEST,
@@ -133,7 +134,7 @@
ENCODING_MAP(THUMB_B_COND, 0xd000,
BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | IS_BRANCH | USES_CCODES,
- "!1c", "!0t", 1),
+ "b!1c", "!0t", 1),
ENCODING_MAP(THUMB_B_UNCOND, 0xe000,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
NO_OPERAND | IS_BRANCH,
@@ -215,9 +216,9 @@
IS_TERTIARY_OP | CLOBBER_DEST,
"ldr", "r!0d, [pc, #!1E]", 1),
ENCODING_MAP(THUMB_LDR_SP_REL, 0x9800,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
- "ldr", "r!0d, [sp, #!1E]", 1),
+ "ldr", "r!0d, [sp, #!2E]", 1),
ENCODING_MAP(THUMB_LDRB_RRI5, 0x7800,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
@@ -323,9 +324,9 @@
IS_TERTIARY_OP,
"str", "r!0d, [r!1d, r!2d]", 1),
ENCODING_MAP(THUMB_STR_SP_REL, 0x9000,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP,
- "str", "r!0d, [sp, #!1E]", 1),
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
+ IS_TERTIARY_OP,
+ "str", "r!0d, [sp, #!2E]", 1),
ENCODING_MAP(THUMB_STRB_RRI5, 0x7000,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
IS_TERTIARY_OP,
@@ -714,6 +715,30 @@
BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES | USES_CCODES,
"sbcs", "r!0d, r!1d, #!2m", 2),
+ ENCODING_MAP(THUMB2_IT, 0xbf00,
+ BITBLT, 7, 4, BITBLT, 3, 0, MODIMM, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | USES_CCODES,
+ "it:!1b", "!0c", 1),
+ ENCODING_MAP(THUMB2_FMSTAT, 0xeef1fa10,
+ UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+ NO_OPERAND | SETS_CCODES,
+ "fmstat", "", 2),
+ ENCODING_MAP(THUMB2_VCMPED, 0xeeb40bc0,
+ DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "vcmpe.f64", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_VCMPES, 0xeeb40ac0,
+ SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "vcmpe.f32", "!0s, !1s", 2),
+ ENCODING_MAP(THUMB2_LDR_PC_REL12, 0xf8df0000,
+ BITBLT, 15, 12, BITBLT, 11, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[rpc, #!1d", 2),
+ ENCODING_MAP(THUMB2_B_COND, 0xf0008000,
+ BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | IS_BRANCH | USES_CCODES,
+ "b!1c", "!0t", 2),
};
@@ -762,6 +787,7 @@
}
if (lir->opCode == THUMB_LDR_PC_REL ||
+ lir->opCode == THUMB2_LDR_PC_REL12 ||
lir->opCode == THUMB_ADD_PC_REL) {
ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
intptr_t pc = (lir->generic.offset + 4) & ~3;
@@ -776,25 +802,33 @@
LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
dvmAbort();
}
- if (delta > 1023) {
+ if ((lir->opCode == THUMB2_LDR_PC_REL12) && (delta > 4091)) {
+ return true;
+ } else if (delta > 1020) {
return true;
}
- lir->operands[1] = delta >> 2;
+ lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ? delta : delta >> 2;
} else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
if (delta > 126 || delta < 0) {
+ /*
+ * TODO: allow multiple kinds of assembler failure to allow us to
+ * change code patterns when things don't fit.
+ */
return true;
+ } else {
+ lir->operands[1] = delta >> 1;
}
- lir->operands[1] = delta >> 1;
- } else if (lir->opCode == THUMB_B_COND) {
+ } else if (lir->opCode == THUMB_B_COND ||
+ lir->opCode == THUMB2_B_COND) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
- if (delta > 254 || delta < -256) {
+ if ((lir->opCode == THUMB_B_COND) && (delta > 254 || delta < -256)) {
return true;
}
lir->operands[0] = delta >> 1;
@@ -829,69 +863,78 @@
u4 bits = encoder->skeleton;
int i;
for (i = 0; i < 4; i++) {
+ u4 operand;
u4 value;
+ operand = lir->operands[i];
switch(encoder->fieldLoc[i].kind) {
case UNUSED:
break;
+ case BROFFSET:
+ value = ((operand & 0x80000) >> 19) << 26;
+ value |= ((operand & 0x40000) >> 18) << 11;
+ value |= ((operand & 0x20000) >> 17) << 13;
+ value |= ((operand & 0x1f800) >> 11) << 16;
+ value |= (operand & 0x007ff);
+ break;
case SHIFT5:
- value = ((lir->operands[i] & 0x1c) >> 2) << 12;
- value |= (lir->operands[i] & 0x03) << 6;
+ value = ((operand & 0x1c) >> 2) << 12;
+ value |= (operand & 0x03) << 6;
bits |= value;
break;
case SHIFT:
- value = ((lir->operands[i] & 0x70) >> 4) << 12;
- value |= (lir->operands[i] & 0x0f) << 4;
+ value = ((operand & 0x70) >> 4) << 12;
+ value |= (operand & 0x0f) << 4;
bits |= value;
break;
case BWIDTH:
- value = lir->operands[i] - 1;
+ value = operand - 1;
bits |= value;
break;
case LSB:
- value = ((lir->operands[i] & 0x1c) >> 2) << 12;
- value |= (lir->operands[i] & 0x03) << 6;
+ value = ((operand & 0x1c) >> 2) << 12;
+ value |= (operand & 0x03) << 6;
bits |= value;
break;
case IMM6:
- value = ((lir->operands[i] & 0x20) >> 5) << 9;
- value |= (lir->operands[i] & 0x1f) << 3;
+ value = ((operand & 0x20) >> 5) << 9;
+ value |= (operand & 0x1f) << 3;
bits |= value;
break;
case BITBLT:
- value = (lir->operands[i] << encoder->fieldLoc[i].start) &
+ value = (operand << encoder->fieldLoc[i].start) &
((1 << (encoder->fieldLoc[i].end + 1)) - 1);
bits |= value;
break;
case DFP:
/* Snag the 1-bit slice and position it */
- value = ((lir->operands[i] & 0x10) >> 4) <<
+ value = ((operand & 0x10) >> 4) <<
encoder->fieldLoc[i].end;
/* Extract and position the 4-bit slice */
- value |= (lir->operands[i] & 0x0f) <<
+ value |= (operand & 0x0f) <<
encoder->fieldLoc[i].start;
bits |= value;
break;
case SFP:
/* Snag the 1-bit slice and position it */
- value = (lir->operands[i] & 0x1) <<
+ value = (operand & 0x1) <<
encoder->fieldLoc[i].end;
/* Extract and position the 4-bit slice */
- value |= ((lir->operands[i] & 0x1e) >> 1) <<
+ value |= ((operand & 0x1e) >> 1) <<
encoder->fieldLoc[i].start;
bits |= value;
break;
case IMM12:
case MODIMM:
- value = ((lir->operands[i] & 0x800) >> 11) << 26;
- value |= ((lir->operands[i] & 0x700) >> 8) << 12;
- value |= lir->operands[i] & 0x0ff;
+ value = ((operand & 0x800) >> 11) << 26;
+ value |= ((operand & 0x700) >> 8) << 12;
+ value |= operand & 0x0ff;
bits |= value;
break;
case IMM16:
- value = ((lir->operands[i] & 0x0800) >> 11) << 26;
- value |= ((lir->operands[i] & 0xf000) >> 12) << 16;
- value |= ((lir->operands[i] & 0x0700) >> 8) << 12;
- value |= lir->operands[i] & 0x0ff;
+ value = ((operand & 0x0800) >> 11) << 26;
+ value |= ((operand & 0xf000) >> 12) << 16;
+ value |= ((operand & 0x0700) >> 8) << 12;
+ value |= operand & 0x0ff;
bits |= value;
break;
default:
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index ff6a3a6..d9a29e8 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -548,6 +548,26 @@
}
/*
+ * If the next instruction is a move-result or move-result-long,
+ * return the target Dalvik instruction and convert the next to a
+ * nop. Otherwise, return -1. Used to optimize method inlining.
+ */
+static int inlinedTarget(MIR *mir)
+{
+ if (mir->next &&
+ ((mir->next->dalvikInsn.opCode == OP_MOVE_RESULT) ||
+ (mir->next->dalvikInsn.opCode == OP_MOVE_RESULT_OBJECT) ||
+ (mir->next->dalvikInsn.opCode == OP_MOVE_RESULT_WIDE))) {
+ mir->next->dalvikInsn.opCode = OP_NOP;
+ return mir->next->dalvikInsn.vA;
+ } else {
+ return -1;
+ }
+}
+
+
+
+/*
* The following are building blocks to insert constants into the pool or
* instruction streams.
*/
@@ -2775,10 +2795,7 @@
case OP_CMPG_DOUBLE:
return genCmpX(cUnit, mir, vA, vB, vC);
case OP_CMP_LONG:
- loadValuePair(cUnit,vB, r0, r1);
- loadValuePair(cUnit, vC, r2, r3);
- genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG);
- storeValue(cUnit, r0, vA, r1);
+ genCmpLong(cUnit, mir, vA, vB, vC);
break;
case OP_AGET_WIDE:
genArrayGet(cUnit, mir, LONG, vB, vC, vA, 3);
@@ -3257,14 +3274,7 @@
else
break; /* Handle with C routine */
case INLINE_MATH_COS:
- if (genInlineCos(cUnit, mir))
- return false;
- else
- break; /* Handle with C routine */
case INLINE_MATH_SIN:
- if (genInlineSin(cUnit, mir))
- return false;
- else
break; /* Handle with C routine */
case INLINE_MATH_ABS_FLOAT:
return genInlinedAbsFloat(cUnit, mir);
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index 5f24b4c..6f00b9e 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -139,6 +139,9 @@
checkLIR->opCode == THUMB2_VLDRD ||
checkLIR->opCode == THUMB2_VSTRD;
+ /* Don't migrate into an IF region */
+ stopHere |= checkLIR->opCode == THUMB2_IT;
+
if (!isPseudoOpCode(checkLIR->opCode)) {
/* Store data is clobbered */
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index f05a867..8d3ce07 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -45,6 +45,7 @@
static ArmLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
int rBound, int dOffset, ArmLIR *pcrLabel);
static ArmLIR *genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+static int inlinedTarget(MIR *mir);
/* Routines which must be supplied here */
@@ -80,6 +81,8 @@
int rSrc1, int rSrc2);
static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
int rIndex, int rDest, int scale, OpSize size);
+static void genCmpLong(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
+ int vSrc2);
static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir);
static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir);
@@ -168,6 +171,46 @@
}
+/*
+ * Generate a Thumb2 IT instruction, which can nullify up to
+ * four subsequent instructions based on a condition and its
+ * inverse. The condition applies to the first instruction, which
+ * is executed if the condition is met. The string "guide" consists
+ * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
+ * A "T" means the instruction is executed if the condition is
+ * met, and an "E" means the instruction is executed if the condition
+ * is not met.
+ */
+static ArmLIR *genIT(CompilationUnit *cUnit, ArmConditionCode code,
+ char *guide)
+{
+ int mask;
+ int condBit = code & 1;
+ int altBit = condBit ^ 1;
+ int mask3 = 0;
+ int mask2 = 0;
+ int mask1 = 0;
+ //Note: case fallthroughs intentional
+ switch(strlen(guide)) {
+ case 3:
+ mask1 = (guide[2] == 'T') ? condBit : altBit;
+ case 2:
+ mask2 = (guide[1] == 'T') ? condBit : altBit;
+ case 1:
+ mask3 = (guide[0] == 'T') ? condBit : altBit;
+ break;
+ case 0:
+ break;
+ default:
+ assert(0);
+ dvmAbort();
+ }
+ mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
+ (1 << (3 - strlen(guide)));
+ return newLIR2(cUnit, THUMB2_IT, code, mask);
+}
+
+
static ArmLIR *fpRegCopy(CompilationUnit *cUnit, int rDest, int rSrc)
{
ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true);
@@ -279,10 +322,6 @@
/* See if the value can be constructed cheaply */
if ((value >= 0) && (value <= 255)) {
return newLIR2(cUnit, THUMB_MOV_IMM, rDest, value);
- } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) {
- res = newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value);
- newLIR2(cUnit, THUMB_MVN, rDest, rDest);
- return res;
}
/* Check Modified immediate special cases */
modImm = modifiedImmediate(value);
@@ -599,7 +638,13 @@
{
ArmLIR *branch;
int modImm;
- if ((LOWREG(reg)) && (checkValue == 0) &&
+ /*
+ * TODO: re-enable usage of THUMB2_CBZ & THUMB2_CBNZ once assembler is enhanced
+ * to allow us to replace code patterns when instructions don't reach. Currently,
+ * CB[N]Z is causing too many assembler aborts. What we want to do is emit
+ * the short forms, and then replace them with longer versions when needed.
+ */
+ if (0 && (LOWREG(reg)) && (checkValue == 0) &&
((cond == ARM_COND_EQ) || (cond == ARM_COND_NE))) {
branch = newLIR2(cUnit,
(cond == ARM_COND_EQ) ? THUMB2_CBZ : THUMB2_CBNZ,
@@ -974,12 +1019,10 @@
case OP_ROR:
return newLIR3(cUnit, THUMB2_ROR_RRI5, rDest, rSrc1, value);
case OP_ADD:
- if (LOWREG(rDest) && (rSrc1 == 13) && (value <= 1020)) { /* sp */
- assert((value & 0x3) == 0);
+ if (LOWREG(rDest) && (rSrc1 == 13) && (value <= 1020) && ((value & 0x3)==0)) {
return newLIR3(cUnit, THUMB_ADD_SP_REL, rDest, rSrc1,
value >> 2);
- } else if (LOWREG(rDest) && (rSrc1 == rpc) && (value <= 1020)) {
- assert((value & 0x3) == 0);
+ } else if (LOWREG(rDest) && (rSrc1 == rpc) && (value <= 1020) && ((value & 0x3)==0)) {
return newLIR3(cUnit, THUMB_ADD_PC_REL, rDest, rSrc1,
value >> 2);
}
@@ -1042,24 +1085,69 @@
return newLIR3(cUnit, opCode, rDest, rSrc1, modImm);
} else {
loadConstant(cUnit, rScratch, value);
- if (EncodingMap[opCode].flags & IS_QUAD_OP)
+ if (EncodingMap[altOpCode].flags & IS_QUAD_OP)
return newLIR4(cUnit, altOpCode, rDest, rSrc1, rScratch, 0);
else
return newLIR3(cUnit, altOpCode, rDest, rSrc1, rScratch);
}
}
-//TODO: specialize the inlined routines for Thumb2
+/*
+ * 64-bit 3way compare function.
+ * mov r7, #-1
+ * cmp op1hi, op2hi
+ * blt done
+ * bgt flip
+ * sub r7, op1lo, op2lo (treat as unsigned)
+ * beq done
+ * ite hi
+ * mov(hi) r7, #-1
+ * mov(!hi) r7, #1
+ * flip:
+ * neg r7
+ * done:
+ */
+static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
+ int vDest, int vSrc1, int vSrc2)
+{
+ int op1lo = selectFirstRegister(cUnit, vSrc1, true);
+ int op1hi = NEXT_REG(op1lo);
+ int op2lo = NEXT_REG(op1hi);
+ int op2hi = NEXT_REG(op2lo);
+ loadValuePair(cUnit, vSrc1, op1lo, op1hi);
+ loadValuePair(cUnit, vSrc2, op2lo, op2hi);
+ /* Note: using hardcoded r7 & r4PC for now. revisit */
+ loadConstant(cUnit, r7, -1);
+ opRegReg(cUnit, OP_CMP, op1hi, op2hi);
+ ArmLIR *branch1 = opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_LT);
+ ArmLIR *branch2 = opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_GT);
+ opRegRegReg(cUnit, OP_SUB, r7, op1lo, op2lo);
+ ArmLIR *branch3 = opImmImm(cUnit, OP_COND_BR, 0, ARM_COND_EQ);
+
+ // TODO: need assert mechanism to verify IT block size
+ branch1->generic.target = (LIR *) genIT(cUnit, ARM_COND_HI, "E");
+ newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, r7, modifiedImmediate(-1));
+ newLIR2(cUnit, THUMB_MOV_IMM, r7, 1);
+
+ branch2->generic.target = (LIR *) opRegReg(cUnit, OP_NEG, r7, r7);
+ branch1->generic.target = (LIR *) storeValue(cUnit, r7, vDest, r4PC);
+ branch3->generic.target = branch1->generic.target;
+}
+
static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
{
DecodedInstruction *dInsn = &mir->dalvikInsn;
int offset = offsetof(InterpState, retval);
int regObj = selectFirstRegister(cUnit, dInsn->arg[0], false);
int reg1 = NEXT_REG(regObj);
+ int vDest = inlinedTarget(mir);
loadValue(cUnit, dInsn->arg[0], regObj);
genNullCheck(cUnit, dInsn->arg[0], regObj, mir->offset, NULL);
loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1);
- storeWordDisp(cUnit, rGLUE, offset, reg1, regObj);
+ if (vDest >= 0)
+ storeValue(cUnit, reg1, vDest, regObj);
+ else
+ storeWordDisp(cUnit, rGLUE, offset, reg1, rNone);
return false;
}
@@ -1072,6 +1160,7 @@
int regIdx = NEXT_REG(regObj);
int regMax = NEXT_REG(regIdx);
int regOff = NEXT_REG(regMax);
+ int vDest = inlinedTarget(mir);
loadValue(cUnit, dInsn->arg[0], regObj);
loadValue(cUnit, dInsn->arg[1], regIdx);
ArmLIR * pcrLabel = genNullCheck(cUnit, dInsn->arg[0], regObj,
@@ -1080,12 +1169,13 @@
loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff);
loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj);
genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel);
-
- newLIR2(cUnit, THUMB_ADD_RI8, regObj, contents);
- newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regOff);
- newLIR3(cUnit, THUMB_ADD_RRR, regIdx, regIdx, regIdx);
- newLIR3(cUnit, THUMB_LDRH_RRR, regMax, regObj, regIdx);
- storeWordDisp(cUnit, rGLUE, offset, regMax, regObj);
+ opRegImm(cUnit, OP_ADD, regObj, contents, rNone);
+ opRegReg(cUnit, OP_ADD, regIdx, regOff);
+ loadBaseIndexed(cUnit, regObj, regIdx, regMax, 1, UNSIGNED_HALF);
+ if (vDest >= 0)
+ storeValue(cUnit, regMax, vDest, regObj);
+ else
+ storeWordDisp(cUnit, rGLUE, offset, regMax, rNone);
return false;
}
@@ -1095,12 +1185,20 @@
DecodedInstruction *dInsn = &mir->dalvikInsn;
int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
int sign = NEXT_REG(reg0);
- /* abs(x) = y<=x>>31, (x+y)^y. Shorter in ARM/THUMB2, no skip in THUMB */
+ int vDest = inlinedTarget(mir);
+ /* abs(x) = y<=x>>31, (x+y)^y. */
loadValue(cUnit, dInsn->arg[0], reg0);
- newLIR3(cUnit, THUMB_ASR, sign, reg0, 31);
- newLIR3(cUnit, THUMB_ADD_RRR, reg0, reg0, sign);
- newLIR2(cUnit, THUMB_EOR, reg0, sign);
- storeWordDisp(cUnit, rGLUE, offset, reg0, sign);
+ /*
+ * Thumb2's IT block also yields 3 instructions, but imposes
+ * scheduling constraints.
+ */
+ opRegRegImm(cUnit, OP_ASR, sign, reg0, 31, rNone);
+ opRegReg(cUnit, OP_ADD, reg0, sign);
+ opRegReg(cUnit, OP_XOR, reg0, sign);
+ if (vDest >= 0)
+ storeValue(cUnit, reg0, vDest, sign);
+ else
+ storeWordDisp(cUnit, rGLUE, offset, reg0, rNone);
return false;
}
@@ -1110,10 +1208,15 @@
DecodedInstruction *dInsn = &mir->dalvikInsn;
int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
int signMask = NEXT_REG(reg0);
+ int vDest = inlinedTarget(mir);
+ // TUNING: handle case of src already in FP reg
loadValue(cUnit, dInsn->arg[0], reg0);
loadConstant(cUnit, signMask, 0x7fffffff);
newLIR2(cUnit, THUMB_AND_RR, reg0, signMask);
- storeWordDisp(cUnit, rGLUE, offset, reg0, signMask);
+ if (vDest >= 0)
+ storeValue(cUnit, reg0, vDest, signMask);
+ else
+ storeWordDisp(cUnit, rGLUE, offset, reg0, rNone);
return false;
}
@@ -1124,30 +1227,46 @@
int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
int ophi = NEXT_REG(oplo);
int signMask = NEXT_REG(ophi);
- loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
- loadConstant(cUnit, signMask, 0x7fffffff);
- storeWordDisp(cUnit, rGLUE, offset, oplo, ophi);
- newLIR2(cUnit, THUMB_AND_RR, ophi, signMask);
- storeWordDisp(cUnit, rGLUE, offset + 4, ophi, oplo);
+ int vSrc = dInsn->arg[0];
+ int vDest = inlinedTarget(mir);
+ // TUNING: handle case of src already in FP reg
+ if (vDest >= 0) {
+ if (vDest == vSrc) {
+ loadValue(cUnit, vSrc+1, ophi);
+ opRegRegImm(cUnit, OP_AND, ophi, ophi, 0x7fffffff, signMask);
+ storeValue(cUnit, ophi, vDest + 1, signMask);
+ } else {
+ loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+ opRegRegImm(cUnit, OP_AND, ophi, ophi, 0x7fffffff, signMask);
+ storeValuePair(cUnit, oplo, ophi, vDest, signMask);
+ }
+ } else {
+ loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
+ loadConstant(cUnit, signMask, 0x7fffffff);
+ storeWordDisp(cUnit, rGLUE, offset, oplo, rNone);
+ opRegReg(cUnit, OP_AND, ophi, signMask);
+ storeWordDisp(cUnit, rGLUE, offset + 4, ophi, rNone);
+ }
return false;
}
- /* No select in thumb, so we need to branch. Thumb2 will do better */
static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
{
int offset = offsetof(InterpState, retval);
DecodedInstruction *dInsn = &mir->dalvikInsn;
int reg0 = selectFirstRegister(cUnit, dInsn->arg[0], false);
int reg1 = NEXT_REG(reg0);
+ int vDest = inlinedTarget(mir);
loadValue(cUnit, dInsn->arg[0], reg0);
loadValue(cUnit, dInsn->arg[1], reg1);
- newLIR2(cUnit, THUMB_CMP_RR, reg0, reg1);
- ArmLIR *branch1 = newLIR2(cUnit, THUMB_B_COND, 2,
- isMin ? ARM_COND_LT : ARM_COND_GT);
- newLIR2(cUnit, THUMB_MOV_RR, reg0, reg1);
- ArmLIR *target =
- newLIR3(cUnit, THUMB_STR_RRI5, reg0, rGLUE, offset >> 2);
- branch1->generic.target = (LIR *)target;
+ opRegReg(cUnit, OP_CMP, reg0, reg1);
+ //TODO: need assertion mechanism to validate IT region size
+ genIT(cUnit, (isMin) ? ARM_COND_GT : ARM_COND_LT, "");
+ opRegReg(cUnit, OP_MOV, reg0, reg1);
+ if (vDest >= 0)
+ storeValue(cUnit, reg0, vDest, reg1);
+ else
+ storeWordDisp(cUnit, rGLUE, offset, reg0, rNone);
return false;
}
@@ -1158,14 +1277,24 @@
int oplo = selectFirstRegister(cUnit, dInsn->arg[0], true);
int ophi = NEXT_REG(oplo);
int sign = NEXT_REG(ophi);
- /* abs(x) = y<=x>>31, (x+y)^y. Shorter in ARM/THUMB2, no skip in THUMB */
+ int vDest = inlinedTarget(mir);
+ /* abs(x) = y<=x>>31, (x+y)^y. */
loadValuePair(cUnit, dInsn->arg[0], oplo, ophi);
- newLIR3(cUnit, THUMB_ASR, sign, ophi, 31);
- newLIR3(cUnit, THUMB_ADD_RRR, oplo, oplo, sign);
- newLIR2(cUnit, THUMB_ADC, ophi, sign);
- newLIR2(cUnit, THUMB_EOR, oplo, sign);
- newLIR2(cUnit, THUMB_EOR, ophi, sign);
- storeWordDisp(cUnit, rGLUE, offset, oplo, sign);
- storeWordDisp(cUnit, rGLUE, offset + 4, ophi, sign);
+ /*
+ * Thumb2 IT block allows slightly shorter sequence,
+ * but introduces a scheduling barrier. Stick with this
+ * mechanism for now.
+ */
+ opRegRegImm(cUnit, OP_ASR, sign, ophi, 31, rNone);
+ opRegReg(cUnit, OP_ADD, oplo, sign);
+ opRegReg(cUnit, OP_ADC, ophi, sign);
+ opRegReg(cUnit, OP_XOR, oplo, sign);
+ opRegReg(cUnit, OP_XOR, ophi, sign);
+ if (vDest >= 0) {
+ storeValuePair(cUnit, oplo, ophi, vDest, sign);
+ } else {
+ storeWordDisp(cUnit, rGLUE, offset, oplo, rNone);
+ storeWordDisp(cUnit, rGLUE, offset + 4, ophi, rNone);
+ }
return false;
}
diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c
index cde1f71..fb25a56 100644
--- a/vm/compiler/codegen/arm/ThumbUtil.c
+++ b/vm/compiler/codegen/arm/ThumbUtil.c
@@ -45,6 +45,7 @@
static ArmLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex,
int rBound, int dOffset, ArmLIR *pcrLabel);
static ArmLIR *genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+static int inlinedTarget(MIR *mir);
/* Routines which must be supplied here */
@@ -80,6 +81,8 @@
int rSrc1, int rSrc2);
static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase,
int rIndex, int rDest, int scale, OpSize size);
+static void genCmpLong(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
+ int vSrc2);
static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir);
static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir);
@@ -757,6 +760,15 @@
return res;
}
+static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
+ int vDest, int vSrc1, int vSrc2)
+{
+ loadValuePair(cUnit, vSrc1, r0, r1);
+ loadValuePair(cUnit, vSrc2, r2, r3);
+ genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG);
+ storeValue(cUnit, r0, vDest, r1);
+}
+
static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
{
DecodedInstruction *dInsn = &mir->dalvikInsn;
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index 6c5b010..732172a 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -124,16 +124,6 @@
return false;
}
-static bool genInlineCos(CompilationUnit *cUnit, MIR *mir)
-{
- return false;
-}
-
-static bool genInlineSin(CompilationUnit *cUnit, MIR *mir)
-{
- return false;
-}
-
static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
int vSrc1, int vSrc2)
{
diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
index a1f2b00..4bd354b 100644
--- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
@@ -117,16 +117,6 @@
return false; /* punt to C handler */
}
-static bool genInlineCos(CompilationUnit *cUnit, MIR *mir)
-{
- return false; /* punt to C handler */
-}
-
-static bool genInlineSin(CompilationUnit *cUnit, MIR *mir)
-{
- return false; /* punt to C handler */
-}
-
static bool genConversion(CompilationUnit *cUnit, MIR *mir)
{
return genConversionPortable(cUnit, mir);
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index f9f2c10..39df8c4 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -117,24 +117,16 @@
{
int offset = offsetof(InterpState, retval);
int vSrc = mir->dalvikInsn.vA;
+ int vDest = inlinedTarget(mir);
loadDouble(cUnit, vSrc, dr1);
newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1);
- assert((offset & 0x3) == 0); /* Must be word aligned */
- assert(offset < 1024);
- newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
+ if (vDest >= 0)
+ storeDouble(cUnit, dr0, vDest, rNone);
+ else
+ newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
return true;
}
-static bool genInlineCos(CompilationUnit *cUnit, MIR *mir)
-{
- return false;
-}
-
-static bool genInlineSin(CompilationUnit *cUnit, MIR *mir)
-{
- return false;
-}
-
static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
int vSrc1, int vSrc2)
{
@@ -181,10 +173,6 @@
{
int op = THUMB_BKPT;
- /*
- * Don't attempt to optimize register usage since these opcodes call out to
- * the handlers.
- */
switch (mir->dalvikInsn.opCode) {
case OP_ADD_DOUBLE_2ADDR:
case OP_ADD_DOUBLE:
@@ -213,7 +201,7 @@
loadDouble(cUnit, vSrc1, dr1);
loadDouble(cUnit, vSrc2, dr2);
newLIR3(cUnit, op, dr0, dr1, dr2);
- storeDouble(cUnit, dr0, vDest, 0);
+ storeDouble(cUnit, dr0, vDest, rNone);
return false;
}
@@ -276,7 +264,7 @@
}
if (longDest) {
newLIR2(cUnit, op, dr0, srcReg);
- storeDouble(cUnit, dr0, vSrc1Dest, 0);
+ storeDouble(cUnit, dr0, vSrc1Dest, rNone);
} else {
newLIR2(cUnit, op, fr0, srcReg);
storeFloat(cUnit, fr0, vSrc1Dest, 0);
@@ -287,31 +275,50 @@
static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
int vSrc2)
{
- TemplateOpCode template;
+ bool isDouble;
+ int defaultResult;
+ bool ltNaNBias;
- /*
- * Don't attempt to optimize register usage since these opcodes call out to
- * the handlers.
- */
switch(mir->dalvikInsn.opCode) {
case OP_CMPL_FLOAT:
- template = TEMPLATE_CMPL_FLOAT_VFP;
+ isDouble = false;
+ defaultResult = -1;
break;
case OP_CMPG_FLOAT:
- template = TEMPLATE_CMPG_FLOAT_VFP;
+ isDouble = false;
+ defaultResult = 1;
break;
case OP_CMPL_DOUBLE:
- template = TEMPLATE_CMPL_DOUBLE_VFP;
+ isDouble = true;
+ defaultResult = -1;
break;
case OP_CMPG_DOUBLE:
- template = TEMPLATE_CMPG_DOUBLE_VFP;
+ isDouble = true;
+ defaultResult = 1;
break;
default:
return true;
}
- loadValueAddress(cUnit, vSrc1, r0);
- loadValueAddress(cUnit, vSrc2, r1);
- genDispatchToHandler(cUnit, template);
- storeValue(cUnit, r0, vDest, r1);
+ if (isDouble) {
+ loadDouble(cUnit, vSrc1, dr0);
+ loadDouble(cUnit, vSrc2, dr1);
+ // Hard-coded use of r7 as temp. Revisit
+ loadConstant(cUnit,r7, defaultResult);
+ newLIR2(cUnit, THUMB2_VCMPED, dr0, dr1);
+ } else {
+ loadFloat(cUnit, vSrc1, fr0);
+ loadFloat(cUnit, vSrc2, fr2);
+ // Hard-coded use of r7 as temp. Revisit
+ loadConstant(cUnit,r7, defaultResult);
+ newLIR2(cUnit, THUMB2_VCMPES, fr0, fr2);
+ }
+ newLIR0(cUnit, THUMB2_FMSTAT);
+ genIT(cUnit, (defaultResult == -1) ? ARM_COND_GT : ARM_COND_MI, "");
+ newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, r7,
+ modifiedImmediate(-defaultResult)); // Must not alter ccodes
+ genIT(cUnit, ARM_COND_EQ, "");
+ loadConstant(cUnit, r7, 0);
+ // Hard-coded use of r4PC as temp. Revisit
+ storeValue(cUnit, r7, vDest, r4PC);
return false;
}