ARM: Implement QADD and QSUB.  Fixes #286917.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2424 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_arm_toIR.c b/priv/guest_arm_toIR.c
index a7b5590..0426b40 100644
--- a/priv/guest_arm_toIR.c
+++ b/priv/guest_arm_toIR.c
@@ -1687,6 +1687,21 @@
              mkU8(31) );
 }
 
+/* Similarly .. also from HD p27 .. */
+static
+IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
+                                      IRTemp argL, IRTemp argR )
+{
+   IRTemp res = newTemp(Ity_I32);
+   assign(res, resE);
+   return
+      binop( Iop_Shr32, 
+             binop( Iop_And32,
+                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
+                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )), 
+             mkU8(31) );
+}
+
 
 /*------------------------------------------------------------*/
 /*--- Larger helpers                                       ---*/
@@ -10255,6 +10270,108 @@
      /* fall through */
    }
 
+   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
+   {
+     UInt regD = 99, regN = 99, regM = 99;
+     Bool gate = False;
+
+     if (isT) {
+        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
+           regN = INSNT0(3,0);
+           regD = INSNT1(11,8);
+           regM = INSNT1(3,0);
+           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+              gate = True;
+        }
+     } else {
+        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
+            INSNA(11,8)  == BITS4(0,0,0,0)         &&
+            INSNA(7,4)   == BITS4(0,1,0,1)) {
+           regD = INSNA(15,12);
+           regN = INSNA(19,16);
+           regM = INSNA(3,0);
+           if (regD != 15 && regN != 15 && regM != 15)
+              gate = True;
+        }
+     }
+
+     if (gate) {
+        IRTemp rNt   = newTemp(Ity_I32);
+        IRTemp rMt   = newTemp(Ity_I32);
+        IRTemp res_q = newTemp(Ity_I32);
+
+        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
+        if (isT)
+           putIRegT( regD, mkexpr(res_q), condT );
+        else
+           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+        or_into_QFLAG32(
+           signed_overflow_after_Add32(
+              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+           condT
+        );
+
+        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+        return True;
+     }
+     /* fall through */
+   }
+
+   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
+   {
+     UInt regD = 99, regN = 99, regM = 99;
+     Bool gate = False;
+
+     if (isT) {
+        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
+           regN = INSNT0(3,0);
+           regD = INSNT1(11,8);
+           regM = INSNT1(3,0);
+           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+              gate = True;
+        }
+     } else {
+        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
+            INSNA(11,8)  == BITS4(0,0,0,0)         &&
+            INSNA(7,4)   == BITS4(0,1,0,1)) {
+           regD = INSNA(15,12);
+           regN = INSNA(19,16);
+           regM = INSNA(3,0);
+           if (regD != 15 && regN != 15 && regM != 15)
+              gate = True;
+        }
+     }
+
+     if (gate) {
+        IRTemp rNt   = newTemp(Ity_I32);
+        IRTemp rMt   = newTemp(Ity_I32);
+        IRTemp res_q = newTemp(Ity_I32);
+
+        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
+        if (isT)
+           putIRegT( regD, mkexpr(res_q), condT );
+        else
+           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+        or_into_QFLAG32(
+           signed_overflow_after_Sub32(
+              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
+           condT
+        );
+
+        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
+        return True;
+     }
+     /* fall through */
+   }
+
    /* ---------- Doesn't match anything. ---------- */
    return False;
 
diff --git a/priv/host_arm_isel.c b/priv/host_arm_isel.c
index f40aa6e..059006b 100644
--- a/priv/host_arm_isel.c
+++ b/priv/host_arm_isel.c
@@ -1362,6 +1362,10 @@
             fn = &h_generic_calc_QSub8Ux4; break;
          case Iop_Sad8Ux4:
             fn = &h_generic_calc_Sad8Ux4; break;
+         case Iop_QAdd32S:
+            fn = &h_generic_calc_QAdd32S; break;
+         case Iop_QSub32S:
+            fn = &h_generic_calc_QSub32S; break;
          default:
             break;
       }
diff --git a/priv/host_generic_simd64.c b/priv/host_generic_simd64.c
index 3bebe90..e3e1975 100644
--- a/priv/host_generic_simd64.c
+++ b/priv/host_generic_simd64.c
@@ -139,6 +139,16 @@
 
 /* Scalar helpers. */
 
+static inline Int qadd32S ( Int xx, Int yy ) 
+{
+   Long t = ((Long)xx) + ((Long)yy);
+   const Long loLim = -0x80000000LL;
+   const Long hiLim =  0x7FFFFFFFLL;
+   if (t < loLim) t = loLim;
+   if (t > hiLim) t = hiLim;
+   return (Int)t;
+}
+
 static inline Short qadd16S ( Short xx, Short yy ) 
 {
    Int t = ((Int)xx) + ((Int)yy);
@@ -169,6 +179,16 @@
    return (UChar)t;
 }
 
+static inline Int qsub32S ( Int xx, Int yy ) 
+{
+   Long t = ((Long)xx) - ((Long)yy);
+   const Long loLim = -0x80000000LL;
+   const Long hiLim =  0x7FFFFFFFLL;
+   if (t < loLim) t = loLim;
+   if (t > hiLim) t = hiLim;
+   return (Int)t;
+}
+
 static inline Short qsub16S ( Short xx, Short yy )
 {
    Int t = ((Int)xx) - ((Int)yy);
@@ -1379,6 +1399,17 @@
           + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
 }
 
+UInt h_generic_calc_QAdd32S ( UInt xx, UInt yy )
+{
+   return qadd32S( xx, yy );
+}
+
+UInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
+{
+   return qsub32S( xx, yy );
+}
+
+
 /*------------------------------------------------------------------*/
 /* Decimal Floating Point (DFP) externally visible helper functions */
 /* that implement Iop_BCDtoDPB and Iop_DPBtoBCD                     */
diff --git a/priv/host_generic_simd64.h b/priv/host_generic_simd64.h
index 1492ad0..4a5fa7a 100644
--- a/priv/host_generic_simd64.h
+++ b/priv/host_generic_simd64.h
@@ -153,11 +153,14 @@
 
 extern UInt h_generic_calc_Sad8Ux4  ( UInt, UInt );
 
+extern UInt h_generic_calc_QAdd32S  ( UInt, UInt );
+extern UInt h_generic_calc_QSub32S  ( UInt, UInt );
+
 extern UInt h_generic_calc_CmpNEZ16x2 ( UInt );
 extern UInt h_generic_calc_CmpNEZ8x4  ( UInt );
 
-extern ULong h_DPBtoBCD( ULong dpb );
-extern ULong h_BCDtoDPB( ULong bcd );
+extern ULong h_DPBtoBCD ( ULong dpb );
+extern ULong h_BCDtoDPB ( ULong bcd );
 
 ULong dpb_to_bcd(ULong chunk);  // helper for h_DPBtoBCD
 ULong bcd_to_dpb(ULong chunk);  // helper for h_BCDtoDPB
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index c689c2b..dc0fc33 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -334,6 +334,8 @@
       case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
       case Iop_CalcFPRF:      vex_printf("CalcFPRF"); return;
 
+      case Iop_QAdd32S: vex_printf("QAdd32S"); return;
+      case Iop_QSub32S: vex_printf("QSub32S"); return; 
       case Iop_Add16x2:   vex_printf("Add16x2"); return;
       case Iop_Sub16x2:   vex_printf("Sub16x2"); return;
       case Iop_QAdd16Sx2: vex_printf("QAdd16Sx2"); return;
@@ -2142,6 +2144,7 @@
       case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
       case Iop_Or32:  case Iop_And32: case Iop_Xor32:
       case Iop_Max32U:
+      case Iop_QAdd32S: case Iop_QSub32S:
       case Iop_Add16x2: case Iop_Sub16x2:
       case Iop_QAdd16Sx2: case Iop_QAdd16Ux2:
       case Iop_QSub16Sx2: case Iop_QSub16Ux2: