ARM: Implement QADD and QSUB. Fixes #286917.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2424 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm_toIR.c b/priv/guest_arm_toIR.c
index a7b5590..0426b40 100644
--- a/priv/guest_arm_toIR.c
+++ b/priv/guest_arm_toIR.c
@@ -1687,6 +1687,21 @@
mkU8(31) );
}
+/* Similarly .. also from HD p27 .. */
+static
+IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
+ IRTemp argL, IRTemp argR )
+{
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, resE);
+ return
+ binop( Iop_Shr32,
+ binop( Iop_And32,
+ binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
+ binop( Iop_Xor32, mkexpr(res), mkexpr(argL) )),
+ mkU8(31) );
+}
+
/*------------------------------------------------------------*/
/*--- Larger helpers ---*/
@@ -10255,6 +10270,108 @@
/* fall through */
}
+ /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
+ INSNA(11,8) == BITS4(0,0,0,0) &&
+ INSNA(7,4) == BITS4(0,1,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ or_into_QFLAG32(
+ signed_overflow_after_Add32(
+ binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+ condT
+ );
+
+ DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
+ INSNA(11,8) == BITS4(0,0,0,0) &&
+ INSNA(7,4) == BITS4(0,1,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ or_into_QFLAG32(
+ signed_overflow_after_Sub32(
+ binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+ condT
+ );
+
+ DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+ return True;
+ }
+ /* fall through */
+ }
+
/* ---------- Doesn't match anything. ---------- */
return False;
diff --git a/priv/host_arm_isel.c b/priv/host_arm_isel.c
index f40aa6e..059006b 100644
--- a/priv/host_arm_isel.c
+++ b/priv/host_arm_isel.c
@@ -1362,6 +1362,10 @@
fn = &h_generic_calc_QSub8Ux4; break;
case Iop_Sad8Ux4:
fn = &h_generic_calc_Sad8Ux4; break;
+ case Iop_QAdd32S:
+ fn = &h_generic_calc_QAdd32S; break;
+ case Iop_QSub32S:
+ fn = &h_generic_calc_QSub32S; break;
default:
break;
}
diff --git a/priv/host_generic_simd64.c b/priv/host_generic_simd64.c
index 3bebe90..e3e1975 100644
--- a/priv/host_generic_simd64.c
+++ b/priv/host_generic_simd64.c
@@ -139,6 +139,16 @@
/* Scalar helpers. */
+static inline Int qadd32S ( Int xx, Int yy )
+{
+ Long t = ((Long)xx) + ((Long)yy);
+ const Long loLim = -0x80000000LL;
+ const Long hiLim = 0x7FFFFFFFLL;
+ if (t < loLim) t = loLim;
+ if (t > hiLim) t = hiLim;
+ return (Int)t;
+}
+
static inline Short qadd16S ( Short xx, Short yy )
{
Int t = ((Int)xx) + ((Int)yy);
@@ -169,6 +179,16 @@
return (UChar)t;
}
+static inline Int qsub32S ( Int xx, Int yy )
+{
+ Long t = ((Long)xx) - ((Long)yy);
+ const Long loLim = -0x80000000LL;
+ const Long hiLim = 0x7FFFFFFFLL;
+ if (t < loLim) t = loLim;
+ if (t > hiLim) t = hiLim;
+ return (Int)t;
+}
+
static inline Short qsub16S ( Short xx, Short yy )
{
Int t = ((Int)xx) - ((Int)yy);
@@ -1379,6 +1399,17 @@
+ absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
}
+UInt h_generic_calc_QAdd32S ( UInt xx, UInt yy )
+{
+ return qadd32S( xx, yy );
+}
+
+UInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
+{
+ return qsub32S( xx, yy );
+}
+
+
/*------------------------------------------------------------------*/
/* Decimal Floating Point (DFP) externally visible helper functions */
/* that implement Iop_BCDtoDPB and Iop_DPBtoBCD */
diff --git a/priv/host_generic_simd64.h b/priv/host_generic_simd64.h
index 1492ad0..4a5fa7a 100644
--- a/priv/host_generic_simd64.h
+++ b/priv/host_generic_simd64.h
@@ -153,11 +153,14 @@
extern UInt h_generic_calc_Sad8Ux4 ( UInt, UInt );
+extern UInt h_generic_calc_QAdd32S ( UInt, UInt );
+extern UInt h_generic_calc_QSub32S ( UInt, UInt );
+
extern UInt h_generic_calc_CmpNEZ16x2 ( UInt );
extern UInt h_generic_calc_CmpNEZ8x4 ( UInt );
-extern ULong h_DPBtoBCD( ULong dpb );
-extern ULong h_BCDtoDPB( ULong bcd );
+extern ULong h_DPBtoBCD ( ULong dpb );
+extern ULong h_BCDtoDPB ( ULong bcd );
ULong dpb_to_bcd(ULong chunk); // helper for h_DPBtoBCD
ULong bcd_to_dpb(ULong chunk); // helper for h_BCDtoDPB
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index c689c2b..dc0fc33 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -334,6 +334,8 @@
case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
case Iop_CalcFPRF: vex_printf("CalcFPRF"); return;
+ case Iop_QAdd32S: vex_printf("QAdd32S"); return;
+ case Iop_QSub32S: vex_printf("QSub32S"); return;
case Iop_Add16x2: vex_printf("Add16x2"); return;
case Iop_Sub16x2: vex_printf("Sub16x2"); return;
case Iop_QAdd16Sx2: vex_printf("QAdd16Sx2"); return;
@@ -2142,6 +2144,7 @@
case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
case Iop_Or32: case Iop_And32: case Iop_Xor32:
case Iop_Max32U:
+ case Iop_QAdd32S: case Iop_QSub32S:
case Iop_Add16x2: case Iop_Sub16x2:
case Iop_QAdd16Sx2: case Iop_QAdd16Ux2:
case Iop_QSub16Sx2: case Iop_QSub16Ux2:
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 860b08b..f0af9f0 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -746,6 +746,10 @@
/* ------------------ 32-bit SIMD Integer ------------------ */
+ /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
+ Iop_QAdd32S,
+ Iop_QSub32S,
+
/* 16x2 add/sub, also signed/unsigned saturating variants */
Iop_Add16x2, Iop_Sub16x2,
Iop_QAdd16Sx2, Iop_QAdd16Ux2,