arm64: add support for the following insns. This completes support
for conversion instructions.
SCVTF d_d_imm, s_s_imm
UCVTF d_d_imm, s_s_imm
FCVTZS d_d_imm, s_s_imm
FCVTZU d_d_imm, s_s_imm
FCVTXN s_d
SCVTF d_d, s_s
UCVTF d_d, s_s
SCVTF {2d_2d,4s_4s,2s_2s}_imm
UCVTF {2d_2d,4s_4s,2s_2s}_imm
FCVTZS {2d_2d,4s_4s,2s_2s}_imm
FCVTZU {2d_2d,4s_4s,2s_2s}_imm
FCVTXN 2s/4s_2d
FCVTZ{S,U} {w,x}_{s,x}_#fbits
git-svn-id: svn://svn.valgrind.org/vex/trunk@3119 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm64_toIR.c b/priv/guest_arm64_toIR.c
index 1249de1..a5b7d21 100644
--- a/priv/guest_arm64_toIR.c
+++ b/priv/guest_arm64_toIR.c
@@ -48,6 +48,9 @@
* FRINTA, FRINTN are kludged .. they just round to nearest. No special
handling for the "ties" case. FRINTX might be dubious too.
+
+ * Ditto FCVTXN. No idea what "round to odd" means. This implementation
+ just rounds to nearest.
*/
/* "Special" instructions.
@@ -8148,6 +8151,16 @@
}
+/* Returns 2.0 ^ n for n in 1 .. 64 */
+static Double two_to_the_plus ( Int n )
+{
+ if (n == 1) return 2.0;
+ vassert(n >= 2 && n <= 64);
+ Int half = n / 2;
+ return two_to_the_plus(half) * two_to_the_plus(n - half);
+}
+
+
/*------------------------------------------------------------*/
/*--- SIMD and FP instructions ---*/
/*------------------------------------------------------------*/
@@ -9340,6 +9353,83 @@
return True;
}
+ if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
+ /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
+ /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
+ UInt size = 0;
+ UInt fbits = 0;
+ Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
+ /* The following holds because immh is never zero. */
+ vassert(ok);
+ /* The following holds because immh >= 0100. */
+ vassert(size == X10 || size == X11);
+ Bool isD = size == X11;
+ Bool isU = bitU == 1;
+ vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
+ Double scale = two_to_the_minus(fbits);
+ IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
+ : IRExpr_Const(IRConst_F32( (Float)scale ));
+ IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
+ IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
+ : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
+ IRType tyF = isD ? Ity_F64 : Ity_F32;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ IRTemp src = newTemp(tyI);
+ IRTemp res = newTemp(tyF);
+ IRTemp rm = mk_get_IR_rounding_mode();
+ assign(src, getQRegLane(nn, 0, tyI));
+ assign(res, triop(opMUL, mkexpr(rm),
+ binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
+ putQRegLane(dd, 0, mkexpr(res));
+ if (!isD) {
+ putQRegLane(dd, 1, mkU32(0));
+ }
+ putQRegLane(dd, 1, mkU64(0));
+ const HChar ch = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
+ ch, dd, ch, nn, fbits);
+ return True;
+ }
+
+ if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
+ /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
+ UInt size = 0;
+ UInt fbits = 0;
+ Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
+ /* The following holds because immh is never zero. */
+ vassert(ok);
+ /* The following holds because immh >= 0100. */
+ vassert(size == X10 || size == X11);
+ Bool isD = size == X11;
+ Bool isU = bitU == 1;
+ vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
+ Double scale = two_to_the_plus(fbits);
+ IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
+ : IRExpr_Const(IRConst_F32( (Float)scale ));
+ IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
+ IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
+ : (isD ? Iop_F64toI64S : Iop_F32toI32S);
+ IRType tyF = isD ? Ity_F64 : Ity_F32;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ IRTemp src = newTemp(tyF);
+ IRTemp res = newTemp(tyI);
+ IRTemp rm = newTemp(Ity_I32);
+ assign(src, getQRegLane(nn, 0, tyF));
+ assign(rm, mkU32(Irrm_ZERO));
+ assign(res, binop(opCVT, mkexpr(rm),
+ triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
+ putQRegLane(dd, 0, mkexpr(res));
+ if (!isD) {
+ putQRegLane(dd, 1, mkU32(0));
+ }
+ putQRegLane(dd, 1, mkU64(0));
+ const HChar ch = isD ? 'd' : 's';
+ DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
+ ch, dd, ch, nn, fbits);
+ return True;
+ }
+
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
return False;
# undef INSN
@@ -9917,6 +10007,19 @@
return True;
}
+ if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
+ /* -------- 1,01,10110 FCVTXN s_d -------- */
+ /* Using Irrm_NEAREST here isn't right. The docs say "round to
+ odd" but I don't know what that really means. */
+ putQRegLO(dd,
+ binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
+ getQRegLO(nn, Ity_F64)));
+ putQRegLane(dd, 1, mkU32(0));
+ putQRegLane(dd, 1, mkU64(0));
+ DIP("fcvtxn s%u, d%u\n", dd, nn);
+ return True;
+ }
+
ix = 0; /*INVALID*/
switch (opcode) {
case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
@@ -9969,6 +10072,25 @@
return True;
}
+ if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
+ /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
+ /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
+ Bool isU = bitU == 1;
+ Bool isD = (size & 1) == 1;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
+ : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
+ IRTemp rm = mk_get_IR_rounding_mode();
+ putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
+ if (!isD) {
+ putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
+ }
+ putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
+ HChar c = isD ? 'd' : 's';
+ DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
+ return True;
+ }
+
if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
/* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
/* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
@@ -10584,6 +10706,99 @@
return False;
}
+ if (opcode == BITS5(1,1,1,0,0)) {
+ /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
+ /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
+ /* If immh is of the form 00xx, the insn is invalid. */
+ if (immh < BITS4(0,1,0,0)) return False;
+ UInt size = 0;
+ UInt fbits = 0;
+ Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
+ /* The following holds because immh is never zero. */
+ vassert(ok);
+ /* The following holds because immh >= 0100. */
+ vassert(size == X10 || size == X11);
+ Bool isD = size == X11;
+ Bool isU = bitU == 1;
+ Bool isQ = bitQ == 1;
+ if (isD && !isQ) return False; /* reject .1d case */
+ vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
+ Double scale = two_to_the_minus(fbits);
+ IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
+ : IRExpr_Const(IRConst_F32( (Float)scale ));
+ IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
+ IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
+ : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
+ IRType tyF = isD ? Ity_F64 : Ity_F32;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
+ vassert(nLanes == 2 || nLanes == 4);
+ for (UInt i = 0; i < nLanes; i++) {
+ IRTemp src = newTemp(tyI);
+ IRTemp res = newTemp(tyF);
+ IRTemp rm = mk_get_IR_rounding_mode();
+ assign(src, getQRegLane(nn, i, tyI));
+ assign(res, triop(opMUL, mkexpr(rm),
+ binop(opCVT, mkexpr(rm), mkexpr(src)),
+ scaleE));
+ putQRegLane(dd, i, mkexpr(res));
+ }
+ if (!isQ) {
+ putQRegLane(dd, 1, mkU64(0));
+ }
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
+ return True;
+ }
+
+ if (opcode == BITS5(1,1,1,1,1)) {
+ /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
+ /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
+ /* If immh is of the form 00xx, the insn is invalid. */
+ if (immh < BITS4(0,1,0,0)) return False;
+ UInt size = 0;
+ UInt fbits = 0;
+ Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
+ /* The following holds because immh is never zero. */
+ vassert(ok);
+ /* The following holds because immh >= 0100. */
+ vassert(size == X10 || size == X11);
+ Bool isD = size == X11;
+ Bool isU = bitU == 1;
+ Bool isQ = bitQ == 1;
+ if (isD && !isQ) return False; /* reject .1d case */
+ vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
+ Double scale = two_to_the_plus(fbits);
+ IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
+ : IRExpr_Const(IRConst_F32( (Float)scale ));
+ IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
+ IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
+ : (isD ? Iop_F64toI64S : Iop_F32toI32S);
+ IRType tyF = isD ? Ity_F64 : Ity_F32;
+ IRType tyI = isD ? Ity_I64 : Ity_I32;
+ UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
+ vassert(nLanes == 2 || nLanes == 4);
+ for (UInt i = 0; i < nLanes; i++) {
+ IRTemp src = newTemp(tyF);
+ IRTemp res = newTemp(tyI);
+ IRTemp rm = newTemp(Ity_I32);
+ assign(src, getQRegLane(nn, i, tyF));
+ assign(rm, mkU32(Irrm_ZERO));
+ assign(res, binop(opCVT, mkexpr(rm),
+ triop(opMUL, mkexpr(rm),
+ mkexpr(src), scaleE)));
+ putQRegLane(dd, i, mkexpr(res));
+ }
+ if (!isQ) {
+ putQRegLane(dd, 1, mkU64(0));
+ }
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
+ nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
+ return True;
+ }
+
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
return False;
# undef INSN
@@ -11983,8 +12198,33 @@
return True;
}
+ if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
+ /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
+ /* Using Irrm_NEAREST here isn't right. The docs say "round to
+ odd" but I don't know what that really means. */
+ IRType srcTy = Ity_F64;
+ IROp opCvt = Iop_F64toF32;
+ IRTemp src[2];
+ for (UInt i = 0; i < 2; i++) {
+ src[i] = newTemp(srcTy);
+ assign(src[i], getQRegLane(nn, i, srcTy));
+ }
+ for (UInt i = 0; i < 2; i++) {
+ putQRegLane(dd, 2 * bitQ + i,
+ binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
+ }
+ if (bitQ == 0) {
+ putQRegLane(dd, 1, mkU64(0));
+ }
+ const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
+ const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
+ DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
+ nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
+ return True;
+ }
+
if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
- /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
+ /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
UInt nLanes = size == X00 ? 4 : 2;
IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
@@ -13111,9 +13351,52 @@
UInt nn = INSN(9,5);
UInt dd = INSN(4,0);
- // op = 010, 011
- /* -------------- {S,U}CVTF (scalar, fixedpt) -------------- */
- /* (ix) sf S 28 ty rm op 15 9 4
+ if (ty <= X01 && rm == X11
+ && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
+ /* -------- (ix) sf ty rm opc -------- */
+ /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
+ /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
+ /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
+ /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
+
+ /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
+ /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
+ /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
+ /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
+ Bool isI64 = bitSF == 1;
+ Bool isF64 = (ty & 1) == 1;
+ Bool isU = (op & 1) == 1;
+ UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
+
+ Int fbits = 64 - sc;
+ vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
+
+ Double scale = two_to_the_plus(fbits);
+ IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
+ : IRExpr_Const(IRConst_F32( (Float)scale ));
+ IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
+
+ const IROp ops[8]
+ = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
+ Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
+ IRTemp irrm = newTemp(Ity_I32);
+ assign(irrm, mkU32(Irrm_ZERO));
+
+ IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
+ IRExpr* res = binop(ops[ix], mkexpr(irrm),
+ triop(opMUL, mkexpr(irrm), src, scaleE));
+ putIRegOrZR(isI64, dd, res);
+
+ DIP("fcvtz%c %s, %s, #%d\n",
+ isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
+ nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
+ return True;
+ }
+
+ /* ------ sf,ty,rm,opc ------ */
+ /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
+ /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
+ /* (ix) sf S 28 ty rm opc 15 9 4
0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits