Improved codegen for inline, continuing codegen restructuring
Added support for Thumb2 IT. Moved compare-long and floating point
comparisons inline. Temporarily disabled use of Thumb2 CBZ & CBNZ
because they were causing too many out-of-range assembly restarts.
Bug fix for LIR3 assert.
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 144a416..f391288 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -69,6 +69,7 @@
* m -> Thumb2 modified immediate
* n -> complimented Thumb2 modified immediate
* M -> Thumb2 16-bit zero-extended immediate
+ * b -> 4-digit binary
*
* [!] escape. To insert "!", use "!!"
*/
@@ -111,9 +112,9 @@
IS_TERTIARY_OP | CLOBBER_DEST,
"add", "r!0d, pc, #!1E", 1),
ENCODING_MAP(THUMB_ADD_SP_REL, 0xa800,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
- "add", "r!0d, sp, #!1E", 1),
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "add", "r!0d, sp, #!2E", 1),
ENCODING_MAP(THUMB_ADD_SPI7, 0xb000,
BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
IS_UNARY_OP | CLOBBER_DEST,
@@ -133,7 +134,7 @@
ENCODING_MAP(THUMB_B_COND, 0xd000,
BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | IS_BRANCH | USES_CCODES,
- "!1c", "!0t", 1),
+ "b!1c", "!0t", 1),
ENCODING_MAP(THUMB_B_UNCOND, 0xe000,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
NO_OPERAND | IS_BRANCH,
@@ -215,9 +216,9 @@
IS_TERTIARY_OP | CLOBBER_DEST,
"ldr", "r!0d, [pc, #!1E]", 1),
ENCODING_MAP(THUMB_LDR_SP_REL, 0x9800,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
- "ldr", "r!0d, [sp, #!1E]", 1),
+ "ldr", "r!0d, [sp, #!2E]", 1),
ENCODING_MAP(THUMB_LDRB_RRI5, 0x7800,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
@@ -323,9 +324,9 @@
IS_TERTIARY_OP,
"str", "r!0d, [r!1d, r!2d]", 1),
ENCODING_MAP(THUMB_STR_SP_REL, 0x9000,
- BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP,
- "str", "r!0d, [sp, #!1E]", 1),
+ BITBLT, 10, 8, UNUSED, -1, -1, BITBLT, 7, 0, UNUSED, -1, -1,
+ IS_TERTIARY_OP,
+ "str", "r!0d, [sp, #!2E]", 1),
ENCODING_MAP(THUMB_STRB_RRI5, 0x7000,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
IS_TERTIARY_OP,
@@ -714,6 +715,30 @@
BITBLT, 11, 8, BITBLT, 19, 16, MODIMM, -1, -1, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST | SETS_CCODES | USES_CCODES,
"sbcs", "r!0d, r!1d, #!2m", 2),
+ ENCODING_MAP(THUMB2_IT, 0xbf00,
+ BITBLT, 7, 4, BITBLT, 3, 0, MODIMM, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | USES_CCODES,
+ "it:!1b", "!0c", 1),
+ ENCODING_MAP(THUMB2_FMSTAT, 0xeef1fa10,
+ UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
+ NO_OPERAND | SETS_CCODES,
+ "fmstat", "", 2),
+ ENCODING_MAP(THUMB2_VCMPED, 0xeeb40bc0,
+ DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "vcmpe.f64", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_VCMPES, 0xeeb40ac0,
+ SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "vcmpe.f32", "!0s, !1s", 2),
+ ENCODING_MAP(THUMB2_LDR_PC_REL12, 0xf8df0000,
+ BITBLT, 15, 12, BITBLT, 11, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[rpc, #!1d", 2),
+ ENCODING_MAP(THUMB2_B_COND, 0xf0008000,
+ BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | IS_BRANCH | USES_CCODES,
+ "b!1c", "!0t", 2),
};
@@ -762,6 +787,7 @@
}
if (lir->opCode == THUMB_LDR_PC_REL ||
+ lir->opCode == THUMB2_LDR_PC_REL12 ||
lir->opCode == THUMB_ADD_PC_REL) {
ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
intptr_t pc = (lir->generic.offset + 4) & ~3;
@@ -776,25 +802,33 @@
LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
dvmAbort();
}
- if (delta > 1023) {
+ if ((lir->opCode == THUMB2_LDR_PC_REL12) && (delta > 4091)) {
+ return true;
+ } else if (delta > 1020) {
return true;
}
- lir->operands[1] = delta >> 2;
+ lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ? delta : delta >> 2;
} else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
if (delta > 126 || delta < 0) {
+ /*
+ * TODO: allow multiple kinds of assembler failure to allow us to
+ * change code patterns when things don't fit.
+ */
return true;
+ } else {
+ lir->operands[1] = delta >> 1;
}
- lir->operands[1] = delta >> 1;
- } else if (lir->opCode == THUMB_B_COND) {
+ } else if (lir->opCode == THUMB_B_COND ||
+ lir->opCode == THUMB2_B_COND) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
intptr_t target = targetLIR->generic.offset;
int delta = target - pc;
- if (delta > 254 || delta < -256) {
+ if ((lir->opCode == THUMB_B_COND) && (delta > 254 || delta < -256)) {
return true;
}
lir->operands[0] = delta >> 1;
@@ -829,69 +863,78 @@
u4 bits = encoder->skeleton;
int i;
for (i = 0; i < 4; i++) {
+ u4 operand;
u4 value;
+ operand = lir->operands[i];
switch(encoder->fieldLoc[i].kind) {
case UNUSED:
break;
+ case BROFFSET:
+ value = ((operand & 0x80000) >> 19) << 26;
+ value |= ((operand & 0x40000) >> 18) << 11;
+ value |= ((operand & 0x20000) >> 17) << 13;
+ value |= ((operand & 0x1f800) >> 11) << 16;
+ value |= (operand & 0x007ff);
+ break;
case SHIFT5:
- value = ((lir->operands[i] & 0x1c) >> 2) << 12;
- value |= (lir->operands[i] & 0x03) << 6;
+ value = ((operand & 0x1c) >> 2) << 12;
+ value |= (operand & 0x03) << 6;
bits |= value;
break;
case SHIFT:
- value = ((lir->operands[i] & 0x70) >> 4) << 12;
- value |= (lir->operands[i] & 0x0f) << 4;
+ value = ((operand & 0x70) >> 4) << 12;
+ value |= (operand & 0x0f) << 4;
bits |= value;
break;
case BWIDTH:
- value = lir->operands[i] - 1;
+ value = operand - 1;
bits |= value;
break;
case LSB:
- value = ((lir->operands[i] & 0x1c) >> 2) << 12;
- value |= (lir->operands[i] & 0x03) << 6;
+ value = ((operand & 0x1c) >> 2) << 12;
+ value |= (operand & 0x03) << 6;
bits |= value;
break;
case IMM6:
- value = ((lir->operands[i] & 0x20) >> 5) << 9;
- value |= (lir->operands[i] & 0x1f) << 3;
+ value = ((operand & 0x20) >> 5) << 9;
+ value |= (operand & 0x1f) << 3;
bits |= value;
break;
case BITBLT:
- value = (lir->operands[i] << encoder->fieldLoc[i].start) &
+ value = (operand << encoder->fieldLoc[i].start) &
((1 << (encoder->fieldLoc[i].end + 1)) - 1);
bits |= value;
break;
case DFP:
/* Snag the 1-bit slice and position it */
- value = ((lir->operands[i] & 0x10) >> 4) <<
+ value = ((operand & 0x10) >> 4) <<
encoder->fieldLoc[i].end;
/* Extract and position the 4-bit slice */
- value |= (lir->operands[i] & 0x0f) <<
+ value |= (operand & 0x0f) <<
encoder->fieldLoc[i].start;
bits |= value;
break;
case SFP:
/* Snag the 1-bit slice and position it */
- value = (lir->operands[i] & 0x1) <<
+ value = (operand & 0x1) <<
encoder->fieldLoc[i].end;
/* Extract and position the 4-bit slice */
- value |= ((lir->operands[i] & 0x1e) >> 1) <<
+ value |= ((operand & 0x1e) >> 1) <<
encoder->fieldLoc[i].start;
bits |= value;
break;
case IMM12:
case MODIMM:
- value = ((lir->operands[i] & 0x800) >> 11) << 26;
- value |= ((lir->operands[i] & 0x700) >> 8) << 12;
- value |= lir->operands[i] & 0x0ff;
+ value = ((operand & 0x800) >> 11) << 26;
+ value |= ((operand & 0x700) >> 8) << 12;
+ value |= operand & 0x0ff;
bits |= value;
break;
case IMM16:
- value = ((lir->operands[i] & 0x0800) >> 11) << 26;
- value |= ((lir->operands[i] & 0xf000) >> 12) << 16;
- value |= ((lir->operands[i] & 0x0700) >> 8) << 12;
- value |= lir->operands[i] & 0x0ff;
+ value = ((operand & 0x0800) >> 11) << 26;
+ value |= ((operand & 0xf000) >> 12) << 16;
+ value |= ((operand & 0x0700) >> 8) << 12;
+ value |= operand & 0x0ff;
bits |= value;
break;
default: