Inline Sqrt bug fix; add support for fp/gen register copies
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 87978d8..be793d6 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -481,14 +481,27 @@
rd[11..8] imm8 */
THUMB2_IT, /* it [10111111] firstcond[7-4] mask[3-0] */
THUMB2_FMSTAT, /* fmstat [11101110111100011111101000010000] */
- THUMB2_VCMPED, /* vcmpe [111011101] D [11011] rd[15-12] [1011]
+ THUMB2_VCMPD, /* vcmp [111011101] D [11011] rd[15-12] [1011]
E [1] M [0] rm[3-0] */
- THUMB2_VCMPES, /* vcmpe [111011101] D [11010] rd[15-12] [1011]
+ THUMB2_VCMPS, /* vcmp [111011101] D [11010] rd[15-12] [1011]
E [1] M [0] rm[3-0] */
THUMB2_LDR_PC_REL12, /* ldr rd,[pc,#imm12] [1111100011011111] rt[15-12]
imm12[11-0] */
THUMB2_B_COND, /* b<c> [1110] S cond[25-22] imm6[21-16] [10]
J1 [0] J2 imm11[10..0] */
+ THUMB2_VMOVD_RR, /* vmov [111011101] D [110000] vd[15-12 [101101]
+ M [0] vm[3-0] */
+ THUMB2_VMOVS_RR, /* vmov [111011101] D [110000] vd[15-12 [101001]
+ M [0] vm[3-0] */
+ THUMB2_FMRS, /* vmov [111011100000] vn[19-16] rt[15-12] [1010]
+ N [0010000] */
+ THUMB2_FMSR, /* vmov [111011100001] vn[19-16] rt[15-12] [1010]
+ N [0010000] */
+ THUMB2_FMRRD, /* vmov [111011000100] rt2[19-16] rt[15-12]
+ [101100] M [1] vm[3-0] */
+ THUMB2_FMDRR, /* vmov [111011000101] rt2[19-16] rt[15-12]
+ [101100] M [1] vm[3-0] */
+
ARM_LAST,
} ArmOpCode;
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index b140457..77cbb4d 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -414,7 +414,7 @@
ENCODING_MAP(THUMB2_VDIVD, 0xee800b00,
DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
- "vdivs", "!0S, !1S, !2S", 2),
+ "vdivd", "!0S, !1S, !2S", 2),
ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
@@ -490,11 +490,11 @@
ENCODING_MAP(THUMB2_VMOVS, 0xeeb00a40,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
- "vmov.f32 ", "!0s, !1s", 2),
+ "vmov.f32 ", " !0s, !1s", 2),
ENCODING_MAP(THUMB2_VMOVD, 0xeeb00b40,
DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
- "vmov.f64 ", "!0s, !1s", 2),
+ "vmov.f64 ", " !0S, !1S", 2),
ENCODING_MAP(THUMB2_LDMIA, 0xe8900000,
BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
@@ -723,22 +723,47 @@
UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
NO_OPERAND | SETS_CCODES,
"fmstat", "", 2),
- ENCODING_MAP(THUMB2_VCMPED, 0xeeb40bc0,
+ ENCODING_MAP(THUMB2_VCMPD, 0xeeb40b40,
DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP,
- "vcmpe.f64", "!0S, !1S", 2),
- ENCODING_MAP(THUMB2_VCMPES, 0xeeb40ac0,
+ "vcmp.f64", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_VCMPS, 0xeeb40a40,
SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP,
- "vcmpe.f32", "!0s, !1s", 2),
+ "vcmp.f32", "!0s, !1s", 2),
ENCODING_MAP(THUMB2_LDR_PC_REL12, 0xf8df0000,
BITBLT, 15, 12, BITBLT, 11, 0, UNUSED, -1, -1, UNUSED, -1, -1,
IS_TERTIARY_OP | CLOBBER_DEST,
"ldr", "r!0d,[rpc, #!1d", 2),
ENCODING_MAP(THUMB2_B_COND, 0xf0008000,
- BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1, UNUSED, -1, -1,
+ BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1,
+ UNUSED, -1, -1,
IS_BINARY_OP | IS_BRANCH | USES_CCODES,
"b!1c", "!0t", 2),
+ ENCODING_MAP(THUMB2_VMOVD_RR, 0xeeb00b40,
+ DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f64", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_VMOVD_RR, 0xeeb00a40,
+ SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "vmov.f32", "!0S, !1S", 2),
+ ENCODING_MAP(THUMB2_FMRS, 0xee100a10,
+ BITBLT, 15, 12, SFP, 8, 16, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "fmrs", "r!0d, !1s", 2),
+ ENCODING_MAP(THUMB2_FMSR, 0xee000a10,
+ SFP, 8, 16, BITBLT, 15, 12, UNUSED, -1, -1, UNUSED, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "fmsr", "!0s, r!1d", 2),
+ ENCODING_MAP(THUMB2_FMRRD, 0xec500b10,
+ BITBLT, 15, 12, BITBLT, 19, 16, DFP, 5, 0, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
+ "fmrrd", "r!0d, r!1d, !2S", 2),
+ ENCODING_MAP(THUMB2_FMDRR, 0xec400b10,
+ DFP, 5, 0, BITBLT, 15, 12, BITBLT, 19, 16, UNUSED, -1, -1,
+ IS_TERTIARY_OP | CLOBBER_DEST,
+ "fmdrr", "!0S, r!1d, r!2d", 2),
};
@@ -812,7 +837,8 @@
} else if (delta > 1020) {
return true;
}
- lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ? delta : delta >> 2;
+ lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ?
+ delta : delta >> 2;
} else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
intptr_t pc = lir->generic.offset + 4;
@@ -820,8 +846,8 @@
int delta = target - pc;
if (delta > 126 || delta < 0) {
/*
- * TODO: allow multiple kinds of assembler failure to allow us to
- * change code patterns when things don't fit.
+ * TODO: allow multiple kinds of assembler failure to allow
+ * change of code patterns when things don't fit.
*/
return true;
} else {
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index 559cf0d..806bd02 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -1232,7 +1232,11 @@
int vDest = inlinedTarget(mir);
// TUNING: handle case of src already in FP reg
if (vDest >= 0) {
- if (vDest == vSrc) {
+ /*
+ * FIXME: disable this case to to work around bug until after
+ * new schedule/ralloc mechanisms are done.
+ */
+ if (0 && (vDest == vSrc)) {
loadValue(cUnit, vSrc+1, ophi);
opRegRegImm(cUnit, OP_AND, ophi, ophi, 0x7fffffff, signMask);
storeValue(cUnit, ophi, vDest + 1, signMask);
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index 732172a..41a79de 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -116,11 +116,12 @@
{
int offset = offsetof(InterpState, retval);
OpCode opCode = mir->dalvikInsn.opCode;
- int vSrc = mir->dalvikInsn.vA;
+ int vSrc = mir->dalvikInsn.arg[0];
loadValueAddress(cUnit, vSrc, r2);
genDispatchToHandler(cUnit, TEMPLATE_SQRT_DOUBLE_VFP);
newLIR3(cUnit, THUMB_STR_RRI5, r0, rGLUE, offset >> 2);
newLIR3(cUnit, THUMB_STR_RRI5, r1, rGLUE, (offset >> 2) + 1);
+ resetRegisterScoreboard(cUnit);
return false;
}
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 39df8c4..65e0ec0 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <math.h> // for double sqrt(double)
+
/*
* This file is included by Codegen-armv5te-vfp.c, and implements architecture
@@ -116,14 +118,26 @@
static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
{
int offset = offsetof(InterpState, retval);
- int vSrc = mir->dalvikInsn.vA;
+ int vSrc = mir->dalvikInsn.arg[0];
int vDest = inlinedTarget(mir);
+ ArmLIR *branch;
+ ArmLIR *target;
+
loadDouble(cUnit, vSrc, dr1);
newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1);
+ newLIR2(cUnit, THUMB2_VCMPD, dr0, dr0);
+ newLIR0(cUnit, THUMB2_FMSTAT);
+ branch = newLIR2(cUnit, THUMB_B_COND, 0, ARM_COND_EQ);
+ loadConstant(cUnit, r2, (int)sqrt);
+ newLIR3(cUnit, THUMB2_FMRRD, r0, r1, dr1);
+ newLIR1(cUnit, THUMB_BLX_R, r2);
+ newLIR3(cUnit, THUMB2_FMDRR, dr0, r0, r1);
if (vDest >= 0)
- storeDouble(cUnit, dr0, vDest, rNone);
+ target = storeDouble(cUnit, dr0, vDest, rNone);
else
- newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
+ target = newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
+ branch->generic.target = (LIR *)target;
+ resetRegisterScoreboard(cUnit);
return true;
}
@@ -304,13 +318,13 @@
loadDouble(cUnit, vSrc2, dr1);
// Hard-coded use of r7 as temp. Revisit
loadConstant(cUnit,r7, defaultResult);
- newLIR2(cUnit, THUMB2_VCMPED, dr0, dr1);
+ newLIR2(cUnit, THUMB2_VCMPD, dr0, dr1);
} else {
loadFloat(cUnit, vSrc1, fr0);
loadFloat(cUnit, vSrc2, fr2);
// Hard-coded use of r7 as temp. Revisit
loadConstant(cUnit,r7, defaultResult);
- newLIR2(cUnit, THUMB2_VCMPES, fr0, fr2);
+ newLIR2(cUnit, THUMB2_VCMPS, fr0, fr2);
}
newLIR0(cUnit, THUMB2_FMSTAT);
genIT(cUnit, (defaultResult == -1) ? ARM_COND_GT : ARM_COND_MI, "");