Memcheck fixes for new IR ops introduced by r2702 (which added support
for AVX2, BMI1, BMI2 and FMA instructions). Part of #305728.
(Jakub Jelinek, jakub@redhat.com)
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13338 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 76e54c7..21cf3c4 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -627,6 +627,7 @@
case Ity_I64: return mkUifU64(mce, a1, a2);
case Ity_I128: return mkUifU128(mce, a1, a2);
case Ity_V128: return mkUifUV128(mce, a1, a2);
+ case Ity_V256: return mkUifUV256(mce, a1, a2);
default:
VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
VG_(tool_panic)("memcheck:mkUifU");
@@ -872,6 +873,13 @@
tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
return tmp1;
+ case Ity_V256:
+ tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
+ tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
+ tmp1, tmp1));
+ tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
+ tmp1, tmp1));
+ return tmp1;
default:
ppIRType(dst_ty);
VG_(tool_panic)("mkPCastTo(2)");
@@ -2024,11 +2032,21 @@
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
}
+static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
+{
+ return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
+}
+
static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
}
+static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
+{
+ return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
+}
+
static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
{
return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
@@ -2433,6 +2451,44 @@
/* Simple ... UifU the args and per-lane pessimise the results. */
+/* --- V256-bit versions --- */
+
+static
+IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
+{
+ IRAtom* at;
+ at = mkUifUV256(mce, vatom1, vatom2);
+ at = mkPCast8x32(mce, at);
+ return at;
+}
+
+static
+IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
+{
+ IRAtom* at;
+ at = mkUifUV256(mce, vatom1, vatom2);
+ at = mkPCast16x16(mce, at);
+ return at;
+}
+
+static
+IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
+{
+ IRAtom* at;
+ at = mkUifUV256(mce, vatom1, vatom2);
+ at = mkPCast32x8(mce, at);
+ return at;
+}
+
+static
+IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
+{
+ IRAtom* at;
+ at = mkUifUV256(mce, vatom1, vatom2);
+ at = mkPCast64x4(mce, at);
+ return at;
+}
+
/* --- V128-bit versions --- */
static
@@ -3697,6 +3753,82 @@
case Iop_XorV256:
return mkUifUV256(mce, vatom1, vatom2);
+ /* V256-bit SIMD */
+
+ case Iop_ShrN16x16:
+ case Iop_ShrN32x8:
+ case Iop_ShrN64x4:
+ case Iop_SarN16x16:
+ case Iop_SarN32x8:
+ case Iop_ShlN16x16:
+ case Iop_ShlN32x8:
+ case Iop_ShlN64x4:
+ /* Same scheme as with all other shifts. Note: 22 Oct 05:
+ this is wrong now, scalar shifts are done properly lazily.
+ Vector shifts should be fixed too. */
+ complainIfUndefined(mce, atom2);
+ return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
+
+ case Iop_QSub8Ux32:
+ case Iop_QSub8Sx32:
+ case Iop_Sub8x32:
+ case Iop_Min8Ux32:
+ case Iop_Min8Sx32:
+ case Iop_Max8Ux32:
+ case Iop_Max8Sx32:
+ case Iop_CmpGT8Sx32:
+ case Iop_CmpEQ8x32:
+ case Iop_Avg8Ux32:
+ case Iop_QAdd8Ux32:
+ case Iop_QAdd8Sx32:
+ case Iop_Add8x32:
+ return binary8Ix32(mce, vatom1, vatom2);
+
+ case Iop_QSub16Ux16:
+ case Iop_QSub16Sx16:
+ case Iop_Sub16x16:
+ case Iop_Mul16x16:
+ case Iop_MulHi16Sx16:
+ case Iop_MulHi16Ux16:
+ case Iop_Min16Sx16:
+ case Iop_Min16Ux16:
+ case Iop_Max16Sx16:
+ case Iop_Max16Ux16:
+ case Iop_CmpGT16Sx16:
+ case Iop_CmpEQ16x16:
+ case Iop_Avg16Ux16:
+ case Iop_QAdd16Ux16:
+ case Iop_QAdd16Sx16:
+ case Iop_Add16x16:
+ return binary16Ix16(mce, vatom1, vatom2);
+
+ case Iop_Sub32x8:
+ case Iop_CmpGT32Sx8:
+ case Iop_CmpEQ32x8:
+ case Iop_Add32x8:
+ case Iop_Max32Ux8:
+ case Iop_Max32Sx8:
+ case Iop_Min32Ux8:
+ case Iop_Min32Sx8:
+ case Iop_Mul32x8:
+ return binary32Ix8(mce, vatom1, vatom2);
+
+ case Iop_Sub64x4:
+ case Iop_Add64x4:
+ case Iop_CmpEQ64x4:
+ case Iop_CmpGT64Sx4:
+ return binary64Ix4(mce, vatom1, vatom2);
+
+ /* Perm32x8: rearrange values in left arg using steering values
+ from right arg. So rearrange the vbits in the same way but
+ pessimise wrt steering values. */
+ case Iop_Perm32x8:
+ return mkUifUV256(
+ mce,
+ assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
+ mkPCast32x8(mce, vatom2)
+ );
+
default:
ppIROp(op);
VG_(tool_panic)("memcheck:expr2vbits_Binop");