Continuing evolution of Thumb2 support.
Bug fix for local optimization
Enable partial floating point store sinking (with significant perf gain!)
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index fb85253..ea133e7 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -65,6 +65,8 @@
* R -> register list
* s -> single precision floating point register
* S -> double precision floating point register
+ * m -> Thumb2 modified immediate
+ * M -> Thumb2 16-bit zero-extended immediate
*
* [!] escape. To insert "!", use "!!"
*/
@@ -365,19 +367,19 @@
"tst", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB2_VLDRS, 0xed900a00,
SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vldr", "!0s, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VLDRD, 0xed900b00,
DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vldr", "!0S, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VMULS, 0xee200a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vmuls", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VMULD, 0xee200b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vmuld", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VSTRS, 0xed800a00,
SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
@@ -389,60 +391,108 @@
"vstr", "!0S, [r!1d, #!2E]", 2),
ENCODING_MAP(THUMB2_VSUBS, 0xee300a40,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vsub", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VSUBD, 0xee300b40,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vsub", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VADDS, 0xee300a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vadd", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VADDD, 0xee300b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vadd", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VDIVS, 0xee800a00,
SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vdivs", "!0s, !1s, !2s", 2),
ENCODING_MAP(THUMB2_VDIVD, 0xee800b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
- IS_TERTIARY_OP,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"vdivs", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f32", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VCVTID, 0xeeb80bc0,
DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f64", "!0S, !1s", 2),
ENCODING_MAP(THUMB2_VCVTFI, 0xeebd0ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.s32.f32 ", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VCVTDI, 0xeebd0bc0,
SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.s32.f64 ", "!0s, !1S", 2),
ENCODING_MAP(THUMB2_VCVTFD, 0xeeb70ac0,
DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f64.f32 ", "!0S, !1s", 2),
ENCODING_MAP(THUMB2_VCVTDF, 0xeeb70bc0,
SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vcvt.f32.f64 ", "!0s, !1S", 2),
ENCODING_MAP(THUMB2_VSQRTS, 0xeeb10ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vsqrt.f32 ", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_VSQRTD, 0xeeb10bc0,
DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
- IS_BINARY_OP,
+ IS_BINARY_OP | CLOBBER_DEST,
"vsqrt.f64 ", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_MOV_IMM_SHIFT, 0xf04f0000,
+ BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, #!1m", 2),
+ ENCODING_MAP(THUMB2_MOV_IMM16, 0xf2400000,
+ BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, #!1M", 2),
+ ENCODING_MAP(THUMB2_STR_RRI12, 0xf8c00000,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+ IS_TERTIARY_OP,
+ "str", "r!0d,[r!1d, #!2d", 2),
+ ENCODING_MAP(THUMB2_LDR_RRI12, 0xf8d00000,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[r!1d, #!2d", 2),
+ ENCODING_MAP(THUMB2_STR_RRI8_PREDEC, 0xf8400c00,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+ IS_TERTIARY_OP,
+ "str", "r!0d,[r!1d, #-!2d]", 2),
+ ENCODING_MAP(THUMB2_LDR_RRI8_PREDEC, 0xf8500c00,
+ BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "ldr", "r!0d,[r!1d, #-!2d]", 2),
+ ENCODING_MAP(THUMB2_CBNZ, 0xb900,
+ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "cbnz", "r!0d,!1t", 1),
+ ENCODING_MAP(THUMB2_CBZ, 0xb100,
+ BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP,
+ "cbz", "r!0d,!1t", 1),
+ ENCODING_MAP(THUMB2_ADD_RRI12, 0xf1000000,
+ BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "add", "r!0d,r!1d,#!2d", 2),
+ ENCODING_MAP(THUMB2_MOV_RR, 0xea4f0000,
+ BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, r!1d", 2),
+ ENCODING_MAP(THUMB2_VMOVS, 0xeeb00a40,
+ SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f32 ", "!0s, !1s", 2),
+ ENCODING_MAP(THUMB2_VMOVD, 0xeeb00b40,
+ DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f64 ", "!0s, !1s", 2),
};
#define PADDING_MOV_R0_R0 0x1C00
@@ -508,6 +558,15 @@
return true;
}
lir->operands[1] = delta >> 2;
+ } else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
+ ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+ intptr_t pc = lir->generic.offset + 4;
+ intptr_t target = targetLIR->generic.offset;
+ int delta = target - pc;
+ if (delta > 126 || delta < 0) {
+ return true;
+ }
+ lir->operands[1] = delta >> 1;
} else if (lir->opCode == THUMB_B_COND) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
@@ -552,6 +611,11 @@
switch(encoder->fieldLoc[i].kind) {
case UNUSED:
break;
+ case IMM6:
+ value = ((lir->operands[i] & 0x20) >> 5) << 9;
+ value |= (lir->operands[i] & 0x1f) << 3;
+ bits |= value;
+ break;
case BITBLT:
value = (lir->operands[i] << encoder->fieldLoc[i].start) &
((1 << (encoder->fieldLoc[i].end + 1)) - 1);
@@ -575,11 +639,19 @@
encoder->fieldLoc[i].start;
bits |= value;
break;
- case IMMSHIFT8:
case IMM12:
+ case MODIMM:
value = ((lir->operands[i] & 0x800) >> 11) << 26;
value |= ((lir->operands[i] & 0x700) >> 8) << 12;
value |= lir->operands[i] & 0x0ff;
+ bits |= value;
+ break;
+ case IMM16:
+ value = ((lir->operands[i] & 0x0800) >> 11) << 26;
+ value |= ((lir->operands[i] & 0xf000) >> 12) << 16;
+ value |= ((lir->operands[i] & 0x0700) >> 8) << 12;
+ value |= lir->operands[i] & 0x0ff;
+ bits |= value;
break;
default:
assert(0);