Merge from branches/THUMB: new IR primops and associated
infrastructure, needed to represent NEON instructions. Way more new
ones than I would like, but I can't see a way to avoid having them.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2016 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index 4d35401..48519e4 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -313,48 +313,125 @@
case Iop_I32UtoFx4: vex_printf("I32UtoFx4"); return;
case Iop_I32StoFx4: vex_printf("I32StoFx4"); return;
+ case Iop_F32toF16x4: vex_printf("F32toF16x4"); return;
+ case Iop_F16toF32x4: vex_printf("F16toF32x4"); return;
+
+ case Iop_Rsqrte32Fx4: vex_printf("VRsqrte32Fx4"); return;
+ case Iop_Rsqrte32x4: vex_printf("VRsqrte32x4"); return;
+ case Iop_Rsqrte32Fx2: vex_printf("VRsqrte32Fx2"); return;
+ case Iop_Rsqrte32x2: vex_printf("VRsqrte32x2"); return;
+
case Iop_QFtoI32Ux4_RZ: vex_printf("QFtoI32Ux4_RZ"); return;
case Iop_QFtoI32Sx4_RZ: vex_printf("QFtoI32Sx4_RZ"); return;
+ case Iop_FtoI32Ux4_RZ: vex_printf("FtoI32Ux4_RZ"); return;
+ case Iop_FtoI32Sx4_RZ: vex_printf("FtoI32Sx4_RZ"); return;
+
+ case Iop_I32UtoFx2: vex_printf("I32UtoFx2"); return;
+ case Iop_I32StoFx2: vex_printf("I32StoFx2"); return;
+
+ case Iop_FtoI32Ux2_RZ: vex_printf("FtoI32Ux2_RZ"); return;
+ case Iop_FtoI32Sx2_RZ: vex_printf("FtoI32Sx2_RZ"); return;
+
case Iop_RoundF32x4_RM: vex_printf("RoundF32x4_RM"); return;
case Iop_RoundF32x4_RP: vex_printf("RoundF32x4_RP"); return;
case Iop_RoundF32x4_RN: vex_printf("RoundF32x4_RN"); return;
case Iop_RoundF32x4_RZ: vex_printf("RoundF32x4_RZ"); return;
+ case Iop_Abs8x8: vex_printf("Abs8x8"); return;
+ case Iop_Abs16x4: vex_printf("Abs16x4"); return;
+ case Iop_Abs32x2: vex_printf("Abs32x2"); return;
case Iop_Add8x8: vex_printf("Add8x8"); return;
case Iop_Add16x4: vex_printf("Add16x4"); return;
case Iop_Add32x2: vex_printf("Add32x2"); return;
case Iop_QAdd8Ux8: vex_printf("QAdd8Ux8"); return;
case Iop_QAdd16Ux4: vex_printf("QAdd16Ux4"); return;
+ case Iop_QAdd32Ux2: vex_printf("QAdd32Ux2"); return;
+ case Iop_QAdd64Ux1: vex_printf("QAdd64Ux1"); return;
case Iop_QAdd8Sx8: vex_printf("QAdd8Sx8"); return;
case Iop_QAdd16Sx4: vex_printf("QAdd16Sx4"); return;
+ case Iop_QAdd32Sx2: vex_printf("QAdd32Sx2"); return;
+ case Iop_QAdd64Sx1: vex_printf("QAdd64Sx1"); return;
+ case Iop_PwAdd8x8: vex_printf("PwAdd8x8"); return;
+ case Iop_PwAdd16x4: vex_printf("PwAdd16x4"); return;
+ case Iop_PwAdd32x2: vex_printf("PwAdd32x2"); return;
+ case Iop_PwAdd32Fx2: vex_printf("PwAdd32Fx2"); return;
+ case Iop_PwAddL8Ux8: vex_printf("PwAddL8Ux8"); return;
+ case Iop_PwAddL16Ux4: vex_printf("PwAddL16Ux4"); return;
+ case Iop_PwAddL32Ux2: vex_printf("PwAddL32Ux2"); return;
+ case Iop_PwAddL8Sx8: vex_printf("PwAddL8Sx8"); return;
+ case Iop_PwAddL16Sx4: vex_printf("PwAddL16Sx4"); return;
+ case Iop_PwAddL32Sx2: vex_printf("PwAddL32Sx2"); return;
case Iop_Sub8x8: vex_printf("Sub8x8"); return;
case Iop_Sub16x4: vex_printf("Sub16x4"); return;
case Iop_Sub32x2: vex_printf("Sub32x2"); return;
case Iop_QSub8Ux8: vex_printf("QSub8Ux8"); return;
case Iop_QSub16Ux4: vex_printf("QSub16Ux4"); return;
+ case Iop_QSub32Ux2: vex_printf("QSub32Ux2"); return;
+ case Iop_QSub64Ux1: vex_printf("QSub64Ux1"); return;
case Iop_QSub8Sx8: vex_printf("QSub8Sx8"); return;
case Iop_QSub16Sx4: vex_printf("QSub16Sx4"); return;
+ case Iop_QSub32Sx2: vex_printf("QSub32Sx2"); return;
+ case Iop_QSub64Sx1: vex_printf("QSub64Sx1"); return;
+ case Iop_Mul8x8: vex_printf("Mul8x8"); return;
case Iop_Mul16x4: vex_printf("Mul16x4"); return;
case Iop_Mul32x2: vex_printf("Mul32x2"); return;
- case Iop_Mul32x4: vex_printf("Mul32x4"); return;
+ case Iop_Mul32Fx2: vex_printf("Mul32Fx2"); return;
+ case Iop_PolynomialMul8x8: vex_printf("PolynomialMul8x8"); return;
case Iop_MulHi16Ux4: vex_printf("MulHi16Ux4"); return;
case Iop_MulHi16Sx4: vex_printf("MulHi16Sx4"); return;
+ case Iop_QDMulHi16Sx4: vex_printf("QDMulHi16Sx4"); return;
+ case Iop_QDMulHi32Sx2: vex_printf("QDMulHi32Sx2"); return;
+ case Iop_QRDMulHi16Sx4: vex_printf("QRDMulHi16Sx4"); return;
+ case Iop_QRDMulHi32Sx2: vex_printf("QRDMulHi32Sx2"); return;
+ case Iop_QDMulLong16Sx4: vex_printf("QDMulLong16Sx4"); return;
+ case Iop_QDMulLong32Sx2: vex_printf("QDMulLong32Sx2"); return;
case Iop_Avg8Ux8: vex_printf("Avg8Ux8"); return;
case Iop_Avg16Ux4: vex_printf("Avg16Ux4"); return;
+ case Iop_Max8Sx8: vex_printf("Max8Sx8"); return;
case Iop_Max16Sx4: vex_printf("Max16Sx4"); return;
+ case Iop_Max32Sx2: vex_printf("Max32Sx2"); return;
case Iop_Max8Ux8: vex_printf("Max8Ux8"); return;
+ case Iop_Max16Ux4: vex_printf("Max16Ux4"); return;
+ case Iop_Max32Ux2: vex_printf("Max32Ux2"); return;
+ case Iop_Min8Sx8: vex_printf("Min8Sx8"); return;
case Iop_Min16Sx4: vex_printf("Min16Sx4"); return;
+ case Iop_Min32Sx2: vex_printf("Min32Sx2"); return;
case Iop_Min8Ux8: vex_printf("Min8Ux8"); return;
+ case Iop_Min16Ux4: vex_printf("Min16Ux4"); return;
+ case Iop_Min32Ux2: vex_printf("Min32Ux2"); return;
+ case Iop_PwMax8Sx8: vex_printf("PwMax8Sx8"); return;
+ case Iop_PwMax16Sx4: vex_printf("PwMax16Sx4"); return;
+ case Iop_PwMax32Sx2: vex_printf("PwMax32Sx2"); return;
+ case Iop_PwMax8Ux8: vex_printf("PwMax8Ux8"); return;
+ case Iop_PwMax16Ux4: vex_printf("PwMax16Ux4"); return;
+ case Iop_PwMax32Ux2: vex_printf("PwMax32Ux2"); return;
+ case Iop_PwMin8Sx8: vex_printf("PwMin8Sx8"); return;
+ case Iop_PwMin16Sx4: vex_printf("PwMin16Sx4"); return;
+ case Iop_PwMin32Sx2: vex_printf("PwMin32Sx2"); return;
+ case Iop_PwMin8Ux8: vex_printf("PwMin8Ux8"); return;
+ case Iop_PwMin16Ux4: vex_printf("PwMin16Ux4"); return;
+ case Iop_PwMin32Ux2: vex_printf("PwMin32Ux2"); return;
case Iop_CmpEQ8x8: vex_printf("CmpEQ8x8"); return;
case Iop_CmpEQ16x4: vex_printf("CmpEQ16x4"); return;
case Iop_CmpEQ32x2: vex_printf("CmpEQ32x2"); return;
+ case Iop_CmpGT8Ux8: vex_printf("CmpGT8Ux8"); return;
+ case Iop_CmpGT16Ux4: vex_printf("CmpGT16Ux4"); return;
+ case Iop_CmpGT32Ux2: vex_printf("CmpGT32Ux2"); return;
case Iop_CmpGT8Sx8: vex_printf("CmpGT8Sx8"); return;
case Iop_CmpGT16Sx4: vex_printf("CmpGT16Sx4"); return;
case Iop_CmpGT32Sx2: vex_printf("CmpGT32Sx2"); return;
+ case Iop_Cnt8x8: vex_printf("Cnt8x8"); return;
+ case Iop_Clz8Sx8: vex_printf("Clz8Sx8"); return;
+ case Iop_Clz16Sx4: vex_printf("Clz16Sx4"); return;
+ case Iop_Clz32Sx2: vex_printf("Clz32Sx2"); return;
+ case Iop_Cls8Sx8: vex_printf("Cls8Sx8"); return;
+ case Iop_Cls16Sx4: vex_printf("Cls16Sx4"); return;
+ case Iop_Cls32Sx2: vex_printf("Cls32Sx2"); return;
case Iop_ShlN8x8: vex_printf("ShlN8x8"); return;
case Iop_ShlN16x4: vex_printf("ShlN16x4"); return;
case Iop_ShlN32x2: vex_printf("ShlN32x2"); return;
+ case Iop_ShrN8x8: vex_printf("ShrN8x8"); return;
case Iop_ShrN16x4: vex_printf("ShrN16x4"); return;
case Iop_ShrN32x2: vex_printf("ShrN32x2"); return;
case Iop_SarN8x8: vex_printf("SarN8x8"); return;
@@ -369,15 +446,62 @@
case Iop_InterleaveLO8x8: vex_printf("InterleaveLO8x8"); return;
case Iop_InterleaveLO16x4: vex_printf("InterleaveLO16x4"); return;
case Iop_InterleaveLO32x2: vex_printf("InterleaveLO32x2"); return;
+ case Iop_CatOddLanes8x8: vex_printf("CatOddLanes8x8"); return;
case Iop_CatOddLanes16x4: vex_printf("CatOddLanes16x4"); return;
+ case Iop_CatEvenLanes8x8: vex_printf("CatEvenLanes8x8"); return;
case Iop_CatEvenLanes16x4: vex_printf("CatEvenLanes16x4"); return;
+ case Iop_InterleaveOddLanes8x8: vex_printf("InterleaveOddLanes8x8"); return;
+ case Iop_InterleaveOddLanes16x4: vex_printf("InterleaveOddLanes16x4"); return;
+ case Iop_InterleaveEvenLanes8x8: vex_printf("InterleaveEvenLanes8x8"); return;
+ case Iop_InterleaveEvenLanes16x4: vex_printf("InterleaveEvenLanes16x4"); return;
+ case Iop_Shl8x8: vex_printf("Shl8x8"); return;
+ case Iop_Shl16x4: vex_printf("Shl16x4"); return;
+ case Iop_Shl32x2: vex_printf("Shl32x2"); return;
+ case Iop_Shr8x8: vex_printf("Shr8x8"); return;
+ case Iop_Shr16x4: vex_printf("Shr16x4"); return;
+ case Iop_Shr32x2: vex_printf("Shr32x2"); return;
+ case Iop_QShl8x8: vex_printf("QShl8x8"); return;
+ case Iop_QShl16x4: vex_printf("QShl16x4"); return;
+ case Iop_QShl32x2: vex_printf("QShl32x2"); return;
+ case Iop_QShl64x1: vex_printf("QShl64x1"); return;
+ case Iop_QSal8x8: vex_printf("QSal8x8"); return;
+ case Iop_QSal16x4: vex_printf("QSal16x4"); return;
+ case Iop_QSal32x2: vex_printf("QSal32x2"); return;
+ case Iop_QSal64x1: vex_printf("QSal64x1"); return;
+ case Iop_QShlN8x8: vex_printf("QShlN8x8"); return;
+ case Iop_QShlN16x4: vex_printf("QShlN16x4"); return;
+ case Iop_QShlN32x2: vex_printf("QShlN32x2"); return;
+ case Iop_QShlN64x1: vex_printf("QShlN64x1"); return;
+ case Iop_QShlN8Sx8: vex_printf("QShlN8Sx8"); return;
+ case Iop_QShlN16Sx4: vex_printf("QShlN16Sx4"); return;
+ case Iop_QShlN32Sx2: vex_printf("QShlN32Sx2"); return;
+ case Iop_QShlN64Sx1: vex_printf("QShlN64Sx1"); return;
+ case Iop_QSalN8x8: vex_printf("QSalN8x8"); return;
+ case Iop_QSalN16x4: vex_printf("QSalN16x4"); return;
+ case Iop_QSalN32x2: vex_printf("QSalN32x2"); return;
+ case Iop_QSalN64x1: vex_printf("QSalN64x1"); return;
+ case Iop_Sar8x8: vex_printf("Sar8x8"); return;
+ case Iop_Sar16x4: vex_printf("Sar16x4"); return;
+ case Iop_Sar32x2: vex_printf("Sar32x2"); return;
+ case Iop_Sal8x8: vex_printf("Sal8x8"); return;
+ case Iop_Sal16x4: vex_printf("Sal16x4"); return;
+ case Iop_Sal32x2: vex_printf("Sal32x2"); return;
+ case Iop_Sal64x1: vex_printf("Sal64x1"); return;
case Iop_Perm8x8: vex_printf("Perm8x8"); return;
+ case Iop_Reverse16_8x8: vex_printf("Reverse16_8x8"); return;
+ case Iop_Reverse32_8x8: vex_printf("Reverse32_8x8"); return;
+ case Iop_Reverse32_16x4: vex_printf("Reverse32_16x4"); return;
+ case Iop_Reverse64_8x8: vex_printf("Reverse64_8x8"); return;
+ case Iop_Reverse64_16x4: vex_printf("Reverse64_16x4"); return;
+ case Iop_Reverse64_32x2: vex_printf("Reverse64_32x2"); return;
+ case Iop_Abs32Fx2: vex_printf("Abs32Fx2"); return;
case Iop_CmpNEZ32x2: vex_printf("CmpNEZ32x2"); return;
case Iop_CmpNEZ16x4: vex_printf("CmpNEZ16x4"); return;
case Iop_CmpNEZ8x8: vex_printf("CmpNEZ8x8"); return;
case Iop_Add32Fx4: vex_printf("Add32Fx4"); return;
+ case Iop_Add32Fx2: vex_printf("Add32Fx2"); return;
case Iop_Add32F0x4: vex_printf("Add32F0x4"); return;
case Iop_Add64Fx2: vex_printf("Add64Fx2"); return;
case Iop_Add64F0x2: vex_printf("Add64F0x2"); return;
@@ -388,11 +512,17 @@
case Iop_Div64F0x2: vex_printf("Div64F0x2"); return;
case Iop_Max32Fx4: vex_printf("Max32Fx4"); return;
+ case Iop_Max32Fx2: vex_printf("Max32Fx2"); return;
+ case Iop_PwMax32Fx4: vex_printf("PwMax32Fx4"); return;
+ case Iop_PwMax32Fx2: vex_printf("PwMax32Fx2"); return;
case Iop_Max32F0x4: vex_printf("Max32F0x4"); return;
case Iop_Max64Fx2: vex_printf("Max64Fx2"); return;
case Iop_Max64F0x2: vex_printf("Max64F0x2"); return;
case Iop_Min32Fx4: vex_printf("Min32Fx4"); return;
+ case Iop_Min32Fx2: vex_printf("Min32Fx2"); return;
+ case Iop_PwMin32Fx4: vex_printf("PwMin32Fx4"); return;
+ case Iop_PwMin32Fx2: vex_printf("PwMin32Fx2"); return;
case Iop_Min32F0x4: vex_printf("Min32F0x4"); return;
case Iop_Min64Fx2: vex_printf("Min64Fx2"); return;
case Iop_Min64F0x2: vex_printf("Min64F0x2"); return;
@@ -402,10 +532,18 @@
case Iop_Mul64Fx2: vex_printf("Mul64Fx2"); return;
case Iop_Mul64F0x2: vex_printf("Mul64F0x2"); return;
+ case Iop_Recip32x2: vex_printf("Recip32x2"); return;
+ case Iop_Recip32Fx2: vex_printf("Recip32Fx2"); return;
case Iop_Recip32Fx4: vex_printf("Recip32Fx4"); return;
+ case Iop_Recip32x4: vex_printf("Recip32x4"); return;
case Iop_Recip32F0x4: vex_printf("Recip32F0x4"); return;
case Iop_Recip64Fx2: vex_printf("Recip64Fx2"); return;
case Iop_Recip64F0x2: vex_printf("Recip64F0x2"); return;
+ case Iop_Recps32Fx2: vex_printf("VRecps32Fx2"); return;
+ case Iop_Recps32Fx4: vex_printf("VRecps32Fx4"); return;
+ case Iop_Abs32Fx4: vex_printf("Abs32Fx4"); return;
+ case Iop_Rsqrts32Fx4: vex_printf("VRsqrts32Fx4"); return;
+ case Iop_Rsqrts32Fx2: vex_printf("VRsqrts32Fx2"); return;
case Iop_RSqrt32Fx4: vex_printf("RSqrt32Fx4"); return;
case Iop_RSqrt32F0x4: vex_printf("RSqrt32F0x4"); return;
@@ -418,6 +556,7 @@
case Iop_Sqrt64F0x2: vex_printf("Sqrt64F0x2"); return;
case Iop_Sub32Fx4: vex_printf("Sub32Fx4"); return;
+ case Iop_Sub32Fx2: vex_printf("Sub32Fx2"); return;
case Iop_Sub32F0x4: vex_printf("Sub32F0x4"); return;
case Iop_Sub64Fx2: vex_printf("Sub64Fx2"); return;
case Iop_Sub64F0x2: vex_printf("Sub64F0x2"); return;
@@ -432,6 +571,9 @@
case Iop_CmpLT64Fx2: vex_printf("CmpLT64Fx2"); return;
case Iop_CmpLE64Fx2: vex_printf("CmpLE64Fx2"); return;
case Iop_CmpUN64Fx2: vex_printf("CmpUN64Fx2"); return;
+ case Iop_CmpGT32Fx2: vex_printf("CmpGT32Fx2"); return;
+ case Iop_CmpEQ32Fx2: vex_printf("CmpEQ32Fx2"); return;
+ case Iop_CmpGE32Fx2: vex_printf("CmpGE32Fx2"); return;
case Iop_CmpEQ32F0x4: vex_printf("CmpEQ32F0x4"); return;
case Iop_CmpLT32F0x4: vex_printf("CmpLT32F0x4"); return;
@@ -442,6 +584,9 @@
case Iop_CmpLE64F0x2: vex_printf("CmpLE64F0x2"); return;
case Iop_CmpUN64F0x2: vex_printf("CmpUN64F0x2"); return;
+ case Iop_Neg32Fx4: vex_printf("Neg32Fx4"); return;
+ case Iop_Neg32Fx2: vex_printf("Neg32Fx2"); return;
+
case Iop_V128to64: vex_printf("V128to64"); return;
case Iop_V128HIto64: vex_printf("V128HIto64"); return;
case Iop_64HLtoV128: vex_printf("64HLtoV128"); return;
@@ -456,6 +601,9 @@
case Iop_Dup8x16: vex_printf("Dup8x16"); return;
case Iop_Dup16x8: vex_printf("Dup16x8"); return;
case Iop_Dup32x4: vex_printf("Dup32x4"); return;
+ case Iop_Dup8x8: vex_printf("Dup8x8"); return;
+ case Iop_Dup16x4: vex_printf("Dup16x4"); return;
+ case Iop_Dup32x2: vex_printf("Dup32x2"); return;
case Iop_NotV128: vex_printf("NotV128"); return;
case Iop_AndV128: vex_printf("AndV128"); return;
@@ -467,6 +615,10 @@
case Iop_CmpNEZ32x4: vex_printf("CmpNEZ32x4"); return;
case Iop_CmpNEZ64x2: vex_printf("CmpNEZ64x2"); return;
+ case Iop_Abs8x16: vex_printf("Abs8x16"); return;
+ case Iop_Abs16x8: vex_printf("Abs16x8"); return;
+ case Iop_Abs32x4: vex_printf("Abs32x4"); return;
+
case Iop_Add8x16: vex_printf("Add8x16"); return;
case Iop_Add16x8: vex_printf("Add16x8"); return;
case Iop_Add32x4: vex_printf("Add32x4"); return;
@@ -477,6 +629,17 @@
case Iop_QAdd8Sx16: vex_printf("QAdd8Sx16"); return;
case Iop_QAdd16Sx8: vex_printf("QAdd16Sx8"); return;
case Iop_QAdd32Sx4: vex_printf("QAdd32Sx4"); return;
+ case Iop_QAdd64Ux2: vex_printf("QAdd64Ux2"); return;
+ case Iop_QAdd64Sx2: vex_printf("QAdd64Sx2"); return;
+ case Iop_PwAdd8x16: vex_printf("PwAdd8x16"); return;
+ case Iop_PwAdd16x8: vex_printf("PwAdd16x8"); return;
+ case Iop_PwAdd32x4: vex_printf("PwAdd32x4"); return;
+ case Iop_PwAddL8Ux16: vex_printf("PwAddL8Ux16"); return;
+ case Iop_PwAddL16Ux8: vex_printf("PwAddL16Ux8"); return;
+ case Iop_PwAddL32Ux4: vex_printf("PwAddL32Ux4"); return;
+ case Iop_PwAddL8Sx16: vex_printf("PwAddL8Sx16"); return;
+ case Iop_PwAddL16Sx8: vex_printf("PwAddL16Sx8"); return;
+ case Iop_PwAddL32Sx4: vex_printf("PwAddL32Sx4"); return;
case Iop_Sub8x16: vex_printf("Sub8x16"); return;
case Iop_Sub16x8: vex_printf("Sub16x8"); return;
@@ -488,12 +651,28 @@
case Iop_QSub8Sx16: vex_printf("QSub8Sx16"); return;
case Iop_QSub16Sx8: vex_printf("QSub16Sx8"); return;
case Iop_QSub32Sx4: vex_printf("QSub32Sx4"); return;
+ case Iop_QSub64Ux2: vex_printf("QSub64Ux2"); return;
+ case Iop_QSub64Sx2: vex_printf("QSub64Sx2"); return;
+ case Iop_Mul8x16: vex_printf("Mul8x16"); return;
case Iop_Mul16x8: vex_printf("Mul16x8"); return;
+ case Iop_Mul32x4: vex_printf("Mul32x4"); return;
+ case Iop_Mull8Ux8: vex_printf("Mull8Ux8"); return;
+ case Iop_Mull8Sx8: vex_printf("Mull8Sx8"); return;
+ case Iop_Mull16Ux4: vex_printf("Mull16Ux4"); return;
+ case Iop_Mull16Sx4: vex_printf("Mull16Sx4"); return;
+ case Iop_Mull32Ux2: vex_printf("Mull32Ux2"); return;
+ case Iop_Mull32Sx2: vex_printf("Mull32Sx2"); return;
+ case Iop_PolynomialMul8x16: vex_printf("PolynomialMul8x16"); return;
+ case Iop_PolynomialMull8x8: vex_printf("PolynomialMull8x8"); return;
case Iop_MulHi16Ux8: vex_printf("MulHi16Ux8"); return;
case Iop_MulHi32Ux4: vex_printf("MulHi32Ux4"); return;
case Iop_MulHi16Sx8: vex_printf("MulHi16Sx8"); return;
case Iop_MulHi32Sx4: vex_printf("MulHi32Sx4"); return;
+ case Iop_QDMulHi16Sx8: vex_printf("QDMulHi16Sx8"); return;
+ case Iop_QDMulHi32Sx4: vex_printf("QDMulHi32Sx4"); return;
+ case Iop_QRDMulHi16Sx8: vex_printf("QRDMulHi16Sx8"); return;
+ case Iop_QRDMulHi32Sx4: vex_printf("QRDMulHi32Sx4"); return;
case Iop_MullEven8Ux16: vex_printf("MullEven8Ux16"); return;
case Iop_MullEven16Ux8: vex_printf("MullEven16Ux8"); return;
@@ -532,6 +711,14 @@
case Iop_CmpGT16Ux8: vex_printf("CmpGT16Ux8"); return;
case Iop_CmpGT32Ux4: vex_printf("CmpGT32Ux4"); return;
+ case Iop_Cnt8x16: vex_printf("Cnt8x16"); return;
+ case Iop_Clz8Sx16: vex_printf("Clz8Sx16"); return;
+ case Iop_Clz16Sx8: vex_printf("Clz16Sx8"); return;
+ case Iop_Clz32Sx4: vex_printf("Clz32Sx4"); return;
+ case Iop_Cls8Sx16: vex_printf("Cls8Sx16"); return;
+ case Iop_Cls16Sx8: vex_printf("Cls16Sx8"); return;
+ case Iop_Cls32Sx4: vex_printf("Cls32Sx4"); return;
+
case Iop_ShlV128: vex_printf("ShlV128"); return;
case Iop_ShrV128: vex_printf("ShrV128"); return;
@@ -546,16 +733,44 @@
case Iop_SarN8x16: vex_printf("SarN8x16"); return;
case Iop_SarN16x8: vex_printf("SarN16x8"); return;
case Iop_SarN32x4: vex_printf("SarN32x4"); return;
+ case Iop_SarN64x2: vex_printf("SarN64x2"); return;
case Iop_Shl8x16: vex_printf("Shl8x16"); return;
case Iop_Shl16x8: vex_printf("Shl16x8"); return;
case Iop_Shl32x4: vex_printf("Shl32x4"); return;
+ case Iop_Shl64x2: vex_printf("Shl64x2"); return;
+ case Iop_QSal8x16: vex_printf("QSal8x16"); return;
+ case Iop_QSal16x8: vex_printf("QSal16x8"); return;
+ case Iop_QSal32x4: vex_printf("QSal32x4"); return;
+ case Iop_QSal64x2: vex_printf("QSal64x2"); return;
+ case Iop_QShl8x16: vex_printf("QShl8x16"); return;
+ case Iop_QShl16x8: vex_printf("QShl16x8"); return;
+ case Iop_QShl32x4: vex_printf("QShl32x4"); return;
+ case Iop_QShl64x2: vex_printf("QShl64x2"); return;
+ case Iop_QSalN8x16: vex_printf("QSalN8x16"); return;
+ case Iop_QSalN16x8: vex_printf("QSalN16x8"); return;
+ case Iop_QSalN32x4: vex_printf("QSalN32x4"); return;
+ case Iop_QSalN64x2: vex_printf("QSalN64x2"); return;
+ case Iop_QShlN8x16: vex_printf("QShlN8x16"); return;
+ case Iop_QShlN16x8: vex_printf("QShlN16x8"); return;
+ case Iop_QShlN32x4: vex_printf("QShlN32x4"); return;
+ case Iop_QShlN64x2: vex_printf("QShlN64x2"); return;
+ case Iop_QShlN8Sx16: vex_printf("QShlN8Sx16"); return;
+ case Iop_QShlN16Sx8: vex_printf("QShlN16Sx8"); return;
+ case Iop_QShlN32Sx4: vex_printf("QShlN32Sx4"); return;
+ case Iop_QShlN64Sx2: vex_printf("QShlN64Sx2"); return;
case Iop_Shr8x16: vex_printf("Shr8x16"); return;
case Iop_Shr16x8: vex_printf("Shr16x8"); return;
case Iop_Shr32x4: vex_printf("Shr32x4"); return;
+ case Iop_Shr64x2: vex_printf("Shr64x2"); return;
case Iop_Sar8x16: vex_printf("Sar8x16"); return;
case Iop_Sar16x8: vex_printf("Sar16x8"); return;
case Iop_Sar32x4: vex_printf("Sar32x4"); return;
+ case Iop_Sar64x2: vex_printf("Sar64x2"); return;
+ case Iop_Sal8x16: vex_printf("Sal8x16"); return;
+ case Iop_Sal16x8: vex_printf("Sal16x8"); return;
+ case Iop_Sal32x4: vex_printf("Sal32x4"); return;
+ case Iop_Sal64x2: vex_printf("Sal64x2"); return;
case Iop_Rol8x16: vex_printf("Rol8x16"); return;
case Iop_Rol16x8: vex_printf("Rol16x8"); return;
case Iop_Rol32x4: vex_printf("Rol32x4"); return;
@@ -566,6 +781,24 @@
case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return;
case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return;
case Iop_QNarrow32Sx4: vex_printf("QNarrow32Sx4"); return;
+ case Iop_Shorten16x8: vex_printf("Shorten16x8"); return;
+ case Iop_Shorten32x4: vex_printf("Shorten32x4"); return;
+ case Iop_Shorten64x2: vex_printf("Shorten64x2"); return;
+ case Iop_QShortenU16Ux8: vex_printf("QShortenU16Ux8"); return;
+ case Iop_QShortenU32Ux4: vex_printf("QShortenU32Ux4"); return;
+ case Iop_QShortenU64Ux2: vex_printf("QShortenU64Ux2"); return;
+ case Iop_QShortenS16Sx8: vex_printf("QShortenS16Sx8"); return;
+ case Iop_QShortenS32Sx4: vex_printf("QShortenS32Sx4"); return;
+ case Iop_QShortenS64Sx2: vex_printf("QShortenS64Sx2"); return;
+ case Iop_QShortenU16Sx8: vex_printf("QShortenU16Sx8"); return;
+ case Iop_QShortenU32Sx4: vex_printf("QShortenU32Sx4"); return;
+ case Iop_QShortenU64Sx2: vex_printf("QShortenU64Sx2"); return;
+ case Iop_Longen8Ux8: vex_printf("Longen8Ux8"); return;
+ case Iop_Longen16Ux4: vex_printf("Longen16Ux4"); return;
+ case Iop_Longen32Ux2: vex_printf("Longen32Ux2"); return;
+ case Iop_Longen8Sx8: vex_printf("Longen8Sx8"); return;
+ case Iop_Longen16Sx4: vex_printf("Longen16Sx4"); return;
+ case Iop_Longen32Sx2: vex_printf("Longen32Sx2"); return;
case Iop_InterleaveHI8x16: vex_printf("InterleaveHI8x16"); return;
case Iop_InterleaveHI16x8: vex_printf("InterleaveHI16x8"); return;
@@ -576,7 +809,51 @@
case Iop_InterleaveLO32x4: vex_printf("InterleaveLO32x4"); return;
case Iop_InterleaveLO64x2: vex_printf("InterleaveLO64x2"); return;
+ case Iop_CatOddLanes8x16: vex_printf("CatOddLanes8x16"); return;
+ case Iop_CatOddLanes16x8: vex_printf("CatOddLanes16x8"); return;
+ case Iop_CatOddLanes32x4: vex_printf("CatOddLanes32x4"); return;
+ case Iop_CatEvenLanes8x16: vex_printf("CatEvenLanes8x16"); return;
+ case Iop_CatEvenLanes16x8: vex_printf("CatEvenLanes16x8"); return;
+ case Iop_CatEvenLanes32x4: vex_printf("CatEvenLanes32x4"); return;
+
+ case Iop_InterleaveOddLanes8x16: vex_printf("InterleaveOddLanes8x16"); return;
+ case Iop_InterleaveOddLanes16x8: vex_printf("InterleaveOddLanes16x8"); return;
+ case Iop_InterleaveOddLanes32x4: vex_printf("InterleaveOddLanes32x4"); return;
+ case Iop_InterleaveEvenLanes8x16: vex_printf("InterleaveEvenLanes8x16"); return;
+ case Iop_InterleaveEvenLanes16x8: vex_printf("InterleaveEvenLanes16x8"); return;
+ case Iop_InterleaveEvenLanes32x4: vex_printf("InterleaveEvenLanes32x4"); return;
+
+ case Iop_GetElem8x16: vex_printf("GetElem8x16"); return;
+ case Iop_GetElem16x8: vex_printf("GetElem16x8"); return;
+ case Iop_GetElem32x4: vex_printf("GetElem32x4"); return;
+ case Iop_GetElem64x2: vex_printf("GetElem64x2"); return;
+
+ case Iop_GetElem8x8: vex_printf("GetElem8x8"); return;
+ case Iop_GetElem16x4: vex_printf("GetElem16x4"); return;
+ case Iop_GetElem32x2: vex_printf("GetElem32x2"); return;
+ case Iop_SetElem8x8: vex_printf("SetElem8x8"); return;
+ case Iop_SetElem16x4: vex_printf("SetElem16x4"); return;
+ case Iop_SetElem32x2: vex_printf("SetElem32x2"); return;
+
+ case Iop_Extract64: vex_printf("Extract64"); return;
+ case Iop_ExtractV128: vex_printf("ExtractV128"); return;
+
case Iop_Perm8x16: vex_printf("Perm8x16"); return;
+ case Iop_Reverse16_8x16: vex_printf("Reverse16_8x16"); return;
+ case Iop_Reverse32_8x16: vex_printf("Reverse32_8x16"); return;
+ case Iop_Reverse32_16x8: vex_printf("Reverse32_16x8"); return;
+ case Iop_Reverse64_8x16: vex_printf("Reverse64_8x16"); return;
+ case Iop_Reverse64_16x8: vex_printf("Reverse64_16x8"); return;
+ case Iop_Reverse64_32x4: vex_printf("Reverse64_32x4"); return;
+
+ case Iop_F32ToFixed32Ux4_RZ: vex_printf("F32ToFixed32Ux4_RZ"); return;
+ case Iop_F32ToFixed32Sx4_RZ: vex_printf("F32ToFixed32Sx4_RZ"); return;
+ case Iop_Fixed32UToF32x4_RN: vex_printf("Fixed32UToF32x4_RN"); return;
+ case Iop_Fixed32SToF32x4_RN: vex_printf("Fixed32SToF32x4_RN"); return;
+ case Iop_F32ToFixed32Ux2_RZ: vex_printf("F32ToFixed32Ux2_RZ"); return;
+ case Iop_F32ToFixed32Sx2_RZ: vex_printf("F32ToFixed32Sx2_RZ"); return;
+ case Iop_Fixed32UToF32x2_RN: vex_printf("Fixed32UToF32x2_RN"); return;
+ case Iop_Fixed32SToF32x2_RN: vex_printf("Fixed32SToF32x2_RN"); return;
default: vpanic("ppIROp(1)");
}
@@ -1182,6 +1459,21 @@
vec[7] = NULL;
return vec;
}
+IRExpr** mkIRExprVec_8 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6,
+ IRExpr* arg7, IRExpr* arg8 ) {
+ IRExpr** vec = LibVEX_Alloc(9 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = arg7;
+ vec[7] = arg8;
+ vec[8] = NULL;
+ return vec;
+}
/* Constructors -- IRDirty */
@@ -1625,29 +1917,67 @@
case Iop_CmpORD64S:
case Iop_Avg8Ux8: case Iop_Avg16Ux4:
case Iop_Add8x8: case Iop_Add16x4: case Iop_Add32x2:
+ case Iop_Add32Fx2: case Iop_Sub32Fx2:
case Iop_CmpEQ8x8: case Iop_CmpEQ16x4: case Iop_CmpEQ32x2:
case Iop_CmpGT8Sx8: case Iop_CmpGT16Sx4: case Iop_CmpGT32Sx2:
+ case Iop_CmpGT8Ux8: case Iop_CmpGT16Ux4: case Iop_CmpGT32Ux2:
+ case Iop_CmpGT32Fx2: case Iop_CmpEQ32Fx2: case Iop_CmpGE32Fx2:
case Iop_InterleaveHI8x8: case Iop_InterleaveLO8x8:
case Iop_InterleaveHI16x4: case Iop_InterleaveLO16x4:
case Iop_InterleaveHI32x2: case Iop_InterleaveLO32x2:
+ case Iop_CatOddLanes8x8: case Iop_CatEvenLanes8x8:
case Iop_CatOddLanes16x4: case Iop_CatEvenLanes16x4:
+ case Iop_InterleaveOddLanes8x8: case Iop_InterleaveEvenLanes8x8:
+ case Iop_InterleaveOddLanes16x4: case Iop_InterleaveEvenLanes16x4:
case Iop_Perm8x8:
- case Iop_Max8Ux8: case Iop_Max16Sx4:
- case Iop_Min8Ux8: case Iop_Min16Sx4:
- case Iop_Mul16x4: case Iop_Mul32x2:
+ case Iop_Max8Ux8: case Iop_Max16Ux4: case Iop_Max32Ux2:
+ case Iop_Max8Sx8: case Iop_Max16Sx4: case Iop_Max32Sx2:
+ case Iop_Max32Fx2: case Iop_Min32Fx2:
+ case Iop_PwMax32Fx2: case Iop_PwMin32Fx2:
+ case Iop_Min8Ux8: case Iop_Min16Ux4: case Iop_Min32Ux2:
+ case Iop_Min8Sx8: case Iop_Min16Sx4: case Iop_Min32Sx2:
+ case Iop_PwMax8Ux8: case Iop_PwMax16Ux4: case Iop_PwMax32Ux2:
+ case Iop_PwMax8Sx8: case Iop_PwMax16Sx4: case Iop_PwMax32Sx2:
+ case Iop_PwMin8Ux8: case Iop_PwMin16Ux4: case Iop_PwMin32Ux2:
+ case Iop_PwMin8Sx8: case Iop_PwMin16Sx4: case Iop_PwMin32Sx2:
+ case Iop_Mul8x8: case Iop_Mul16x4: case Iop_Mul32x2:
+ case Iop_Mul32Fx2:
+ case Iop_PolynomialMul8x8:
case Iop_MulHi16Sx4: case Iop_MulHi16Ux4:
+ case Iop_QDMulHi16Sx4: case Iop_QDMulHi32Sx2:
+ case Iop_QRDMulHi16Sx4: case Iop_QRDMulHi32Sx2:
case Iop_QAdd8Sx8: case Iop_QAdd16Sx4:
+ case Iop_QAdd32Sx2: case Iop_QAdd64Sx1:
case Iop_QAdd8Ux8: case Iop_QAdd16Ux4:
+ case Iop_QAdd32Ux2: case Iop_QAdd64Ux1:
+ case Iop_PwAdd8x8: case Iop_PwAdd16x4: case Iop_PwAdd32x2:
+ case Iop_PwAdd32Fx2:
case Iop_QNarrow32Sx2:
case Iop_QNarrow16Sx4: case Iop_QNarrow16Ux4:
case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
case Iop_QSub8Sx8: case Iop_QSub16Sx4:
+ case Iop_QSub32Sx2: case Iop_QSub64Sx1:
case Iop_QSub8Ux8: case Iop_QSub16Ux4:
+ case Iop_QSub32Ux2: case Iop_QSub64Ux1:
+ case Iop_Shl8x8: case Iop_Shl16x4: case Iop_Shl32x2:
+ case Iop_Shr8x8: case Iop_Shr16x4: case Iop_Shr32x2:
+ case Iop_Sar8x8: case Iop_Sar16x4: case Iop_Sar32x2:
+ case Iop_Sal8x8: case Iop_Sal16x4: case Iop_Sal32x2: case Iop_Sal64x1:
+ case Iop_QShl8x8: case Iop_QShl16x4: case Iop_QShl32x2: case Iop_QShl64x1:
+ case Iop_QSal8x8: case Iop_QSal16x4: case Iop_QSal32x2: case Iop_QSal64x1:
+ case Iop_Recps32Fx2:
+ case Iop_Rsqrts32Fx2:
BINARY(Ity_I64,Ity_I64, Ity_I64);
case Iop_ShlN32x2: case Iop_ShlN16x4: case Iop_ShlN8x8:
- case Iop_ShrN32x2: case Iop_ShrN16x4:
+ case Iop_ShrN32x2: case Iop_ShrN16x4: case Iop_ShrN8x8:
case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8:
+ case Iop_QShlN8x8: case Iop_QShlN16x4:
+ case Iop_QShlN32x2: case Iop_QShlN64x1:
+ case Iop_QShlN8Sx8: case Iop_QShlN16Sx4:
+ case Iop_QShlN32Sx2: case Iop_QShlN64Sx1:
+ case Iop_QSalN8x8: case Iop_QSalN16x4:
+ case Iop_QSalN32x2: case Iop_QSalN64x1:
BINARY(Ity_I64,Ity_I8, Ity_I64);
case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
@@ -1668,6 +1998,22 @@
case Iop_Not64:
case Iop_CmpNEZ32x2: case Iop_CmpNEZ16x4: case Iop_CmpNEZ8x8:
+ case Iop_Cnt8x8:
+ case Iop_Clz8Sx8: case Iop_Clz16Sx4: case Iop_Clz32Sx2:
+ case Iop_Cls8Sx8: case Iop_Cls16Sx4: case Iop_Cls32Sx2:
+ case Iop_PwAddL8Ux8: case Iop_PwAddL16Ux4: case Iop_PwAddL32Ux2:
+ case Iop_PwAddL8Sx8: case Iop_PwAddL16Sx4: case Iop_PwAddL32Sx2:
+ case Iop_Reverse64_8x8: case Iop_Reverse64_16x4: case Iop_Reverse64_32x2:
+ case Iop_Reverse32_8x8: case Iop_Reverse32_16x4:
+ case Iop_Reverse16_8x8:
+ case Iop_FtoI32Sx2_RZ: case Iop_FtoI32Ux2_RZ:
+ case Iop_I32StoFx2: case Iop_I32UtoFx2:
+ case Iop_Recip32x2: case Iop_Recip32Fx2:
+ case Iop_Abs32Fx2:
+ case Iop_Rsqrte32Fx2:
+ case Iop_Rsqrte32x2:
+ case Iop_Neg32Fx2:
+ case Iop_Abs8x8: case Iop_Abs16x4: case Iop_Abs32x2:
UNARY(Ity_I64, Ity_I64);
case Iop_CmpEQ8: case Iop_CmpNE8:
@@ -1853,16 +2199,31 @@
case Iop_I32StoFx4:
case Iop_QFtoI32Ux4_RZ:
case Iop_QFtoI32Sx4_RZ:
+ case Iop_FtoI32Ux4_RZ:
+ case Iop_FtoI32Sx4_RZ:
case Iop_RoundF32x4_RM:
case Iop_RoundF32x4_RP:
case Iop_RoundF32x4_RN:
case Iop_RoundF32x4_RZ:
+ case Iop_Abs32Fx4:
+ case Iop_Rsqrte32Fx4:
+ case Iop_Rsqrte32x4:
UNARY(Ity_V128, Ity_V128);
case Iop_64HLtoV128: BINARY(Ity_I64,Ity_I64, Ity_V128);
- case Iop_V128to64: case Iop_V128HIto64:
+ case Iop_V128to64: case Iop_V128HIto64:
+ case Iop_Shorten16x8: case Iop_Shorten32x4: case Iop_Shorten64x2:
+ case Iop_QShortenU16Ux8: case Iop_QShortenU32Ux4: case Iop_QShortenU64Ux2:
+ case Iop_QShortenS16Sx8: case Iop_QShortenS32Sx4: case Iop_QShortenS64Sx2:
+ case Iop_QShortenU16Sx8: case Iop_QShortenU32Sx4: case Iop_QShortenU64Sx2:
+ case Iop_F32toF16x4:
UNARY(Ity_V128, Ity_I64);
+ case Iop_Longen8Ux8: case Iop_Longen16Ux4: case Iop_Longen32Ux2:
+ case Iop_Longen8Sx8: case Iop_Longen16Sx4: case Iop_Longen32Sx2:
+ case Iop_F16toF32x4:
+ UNARY(Ity_I64, Ity_V128);
+
case Iop_V128to32: UNARY(Ity_V128, Ity_I32);
case Iop_32UtoV128: UNARY(Ity_I32, Ity_V128);
case Iop_64UtoV128: UNARY(Ity_I64, Ity_V128);
@@ -1872,6 +2233,9 @@
case Iop_Dup8x16: UNARY(Ity_I8, Ity_V128);
case Iop_Dup16x8: UNARY(Ity_I16, Ity_V128);
case Iop_Dup32x4: UNARY(Ity_I32, Ity_V128);
+ case Iop_Dup8x8: UNARY(Ity_I8, Ity_I64);
+ case Iop_Dup16x4: UNARY(Ity_I16, Ity_I64);
+ case Iop_Dup32x2: UNARY(Ity_I32, Ity_I64);
case Iop_CmpEQ32Fx4: case Iop_CmpLT32Fx4:
case Iop_CmpEQ64Fx2: case Iop_CmpLT64Fx2:
@@ -1887,6 +2251,7 @@
case Iop_Div32Fx4: case Iop_Div32F0x4:
case Iop_Div64Fx2: case Iop_Div64F0x2:
case Iop_Max32Fx4: case Iop_Max32F0x4:
+ case Iop_PwMax32Fx4: case Iop_PwMin32Fx4:
case Iop_Max64Fx2: case Iop_Max64F0x2:
case Iop_Min32Fx4: case Iop_Min32F0x4:
case Iop_Min64Fx2: case Iop_Min64F0x2:
@@ -1897,15 +2262,23 @@
case Iop_AndV128: case Iop_OrV128: case Iop_XorV128:
case Iop_Add8x16: case Iop_Add16x8:
case Iop_Add32x4: case Iop_Add64x2:
- case Iop_QAdd8Ux16: case Iop_QAdd16Ux8: case Iop_QAdd32Ux4:
- case Iop_QAdd8Sx16: case Iop_QAdd16Sx8: case Iop_QAdd32Sx4:
+ case Iop_QAdd8Ux16: case Iop_QAdd16Ux8:
+ case Iop_QAdd32Ux4: //case Iop_QAdd64Ux2:
+ case Iop_QAdd8Sx16: case Iop_QAdd16Sx8:
+ case Iop_QAdd32Sx4: case Iop_QAdd64Sx2:
+ case Iop_PwAdd8x16: case Iop_PwAdd16x8: case Iop_PwAdd32x4:
case Iop_Sub8x16: case Iop_Sub16x8:
case Iop_Sub32x4: case Iop_Sub64x2:
- case Iop_QSub8Ux16: case Iop_QSub16Ux8: case Iop_QSub32Ux4:
- case Iop_QSub8Sx16: case Iop_QSub16Sx8: case Iop_QSub32Sx4:
- case Iop_Mul16x8: case Iop_Mul32x4:
+ case Iop_QSub8Ux16: case Iop_QSub16Ux8:
+ case Iop_QSub32Ux4: //case Iop_QSub64Ux2:
+ case Iop_QSub8Sx16: case Iop_QSub16Sx8:
+ case Iop_QSub32Sx4: case Iop_QSub64Sx2:
+ case Iop_Mul8x16: case Iop_Mul16x8: case Iop_Mul32x4:
+ case Iop_PolynomialMul8x16:
case Iop_MulHi16Ux8: case Iop_MulHi32Ux4:
case Iop_MulHi16Sx8: case Iop_MulHi32Sx4:
+ case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4:
+ case Iop_QRDMulHi16Sx8: case Iop_QRDMulHi32Sx4:
case Iop_MullEven8Ux16: case Iop_MullEven16Ux8:
case Iop_MullEven8Sx16: case Iop_MullEven16Sx8:
case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4:
@@ -1918,22 +2291,40 @@
case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4:
case Iop_CmpGT64Sx2:
case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4:
- case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4:
- case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4:
- case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4:
+ case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4: case Iop_Shl64x2:
+ case Iop_QShl8x16: case Iop_QShl16x8: case Iop_QShl32x4: case Iop_QShl64x2:
+ case Iop_QSal8x16: case Iop_QSal16x8: case Iop_QSal32x4: case Iop_QSal64x2:
+ case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4: case Iop_Shr64x2:
+ case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
+ case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4:
case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4:
case Iop_Narrow16x8: case Iop_Narrow32x4:
case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
- case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
+ case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
case Iop_InterleaveLO32x4: case Iop_InterleaveLO64x2:
+ case Iop_CatOddLanes8x16: case Iop_CatEvenLanes8x16:
+ case Iop_CatOddLanes16x8: case Iop_CatEvenLanes16x8:
+ case Iop_CatOddLanes32x4: case Iop_CatEvenLanes32x4:
+ case Iop_InterleaveOddLanes8x16: case Iop_InterleaveEvenLanes8x16:
+ case Iop_InterleaveOddLanes16x8: case Iop_InterleaveEvenLanes16x8:
+ case Iop_InterleaveOddLanes32x4: case Iop_InterleaveEvenLanes32x4:
case Iop_Perm8x16:
+ case Iop_Recps32Fx4:
+ case Iop_Rsqrts32Fx4:
BINARY(Ity_V128,Ity_V128, Ity_V128);
+ case Iop_PolynomialMull8x8:
+ case Iop_Mull8Ux8: case Iop_Mull8Sx8:
+ case Iop_Mull16Ux4: case Iop_Mull16Sx4:
+ case Iop_Mull32Ux2: case Iop_Mull32Sx2:
+ BINARY(Ity_I64, Ity_I64, Ity_V128);
+
case Iop_NotV128:
case Iop_Recip32Fx4: case Iop_Recip32F0x4:
+ case Iop_Recip32x4:
case Iop_Recip64Fx2: case Iop_Recip64F0x2:
case Iop_RSqrt32Fx4: case Iop_RSqrt32F0x4:
case Iop_RSqrt64Fx2: case Iop_RSqrt64F0x2:
@@ -1941,6 +2332,16 @@
case Iop_Sqrt64Fx2: case Iop_Sqrt64F0x2:
case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8:
case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2:
+ case Iop_Cnt8x16:
+ case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4:
+ case Iop_Cls8Sx16: case Iop_Cls16Sx8: case Iop_Cls32Sx4:
+ case Iop_PwAddL8Ux16: case Iop_PwAddL16Ux8: case Iop_PwAddL32Ux4:
+ case Iop_PwAddL8Sx16: case Iop_PwAddL16Sx8: case Iop_PwAddL32Sx4:
+ case Iop_Reverse64_8x16: case Iop_Reverse64_16x8: case Iop_Reverse64_32x4:
+ case Iop_Reverse32_8x16: case Iop_Reverse32_16x8:
+ case Iop_Reverse16_8x16:
+ case Iop_Neg32Fx4:
+ case Iop_Abs8x16: case Iop_Abs16x8: case Iop_Abs32x4:
UNARY(Ity_V128, Ity_V128);
case Iop_ShlV128: case Iop_ShrV128:
@@ -1948,9 +2349,57 @@
case Iop_ShlN32x4: case Iop_ShlN64x2:
case Iop_ShrN8x16: case Iop_ShrN16x8:
case Iop_ShrN32x4: case Iop_ShrN64x2:
- case Iop_SarN8x16: case Iop_SarN16x8: case Iop_SarN32x4:
+ case Iop_SarN8x16: case Iop_SarN16x8:
+ case Iop_SarN32x4: case Iop_SarN64x2:
+ case Iop_QShlN8x16: case Iop_QShlN16x8:
+ case Iop_QShlN32x4: case Iop_QShlN64x2:
+ case Iop_QShlN8Sx16: case Iop_QShlN16Sx8:
+ case Iop_QShlN32Sx4: case Iop_QShlN64Sx2:
+ case Iop_QSalN8x16: case Iop_QSalN16x8:
+ case Iop_QSalN32x4: case Iop_QSalN64x2:
BINARY(Ity_V128,Ity_I8, Ity_V128);
+ case Iop_F32ToFixed32Ux4_RZ:
+ case Iop_F32ToFixed32Sx4_RZ:
+ case Iop_Fixed32UToF32x4_RN:
+ case Iop_Fixed32SToF32x4_RN:
+ BINARY(Ity_V128, Ity_I8, Ity_V128);
+
+ case Iop_F32ToFixed32Ux2_RZ:
+ case Iop_F32ToFixed32Sx2_RZ:
+ case Iop_Fixed32UToF32x2_RN:
+ case Iop_Fixed32SToF32x2_RN:
+ BINARY(Ity_I64, Ity_I8, Ity_I64);
+
+ case Iop_GetElem8x16:
+ BINARY(Ity_V128, Ity_I8, Ity_I8);
+ case Iop_GetElem16x8:
+ BINARY(Ity_V128, Ity_I8, Ity_I16);
+ case Iop_GetElem32x4:
+ BINARY(Ity_V128, Ity_I8, Ity_I32);
+ case Iop_GetElem64x2:
+ BINARY(Ity_V128, Ity_I8, Ity_I64);
+ case Iop_GetElem8x8:
+ BINARY(Ity_I64, Ity_I8, Ity_I8);
+ case Iop_GetElem16x4:
+ BINARY(Ity_I64, Ity_I8, Ity_I16);
+ case Iop_GetElem32x2:
+ BINARY(Ity_I64, Ity_I8, Ity_I32);
+ case Iop_SetElem8x8:
+ TERNARY(Ity_I64, Ity_I8, Ity_I8, Ity_I64);
+ case Iop_SetElem16x4:
+ TERNARY(Ity_I64, Ity_I8, Ity_I16, Ity_I64);
+ case Iop_SetElem32x2:
+ TERNARY(Ity_I64, Ity_I8, Ity_I32, Ity_I64);
+
+ case Iop_Extract64:
+ TERNARY(Ity_I64, Ity_I64, Ity_I8, Ity_I64);
+ case Iop_ExtractV128:
+ TERNARY(Ity_V128, Ity_V128, Ity_I8, Ity_V128);
+
+ case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
+ BINARY(Ity_I64, Ity_I64, Ity_V128);
+
default:
ppIROp(op);
vpanic("typeOfPrimop");
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 0e291eb..ee4df18 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -675,6 +675,49 @@
Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
from FP result */
+ /* ------------------ 64-bit SIMD FP ------------------------ */
+
+ /* Convertion to/from int */
+ Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */
+ Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */
+ /* Fixed32 format is floating-point number with fixed number of fraction
+ bits. The number of fraction bits is passed as a second argument of
+ type I8. */
+ Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
+ Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
+
+ /* Binary operations */
+ Iop_Max32Fx2, Iop_Min32Fx2,
+ /* Pairwise Min and Max. See integer pairwise operations for more
+ details. */
+ Iop_PwMax32Fx2, Iop_PwMin32Fx2,
+ /* Note: For the following compares, the arm front-end assumes a
+ nan in a lane of either argument returns zero for that lane. */
+ Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
+
+ /* Vector Reciprocal Estimate finds an approximate reciprocal of each
+ element in the operand vector, and places the results in the destination
+ vector. */
+ Iop_Recip32Fx2,
+
+ /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
+ Note, that if one of the arguments is zero and another one is infinity
+ of arbitrary sign the result of the operation is 2.0. */
+ Iop_Recps32Fx2,
+
+ /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
+ square root of each element in the operand vector. */
+ Iop_Rsqrte32Fx2,
+
+ /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
+ Note, that of one of the arguments is zero and another one is infiinty
+ of arbitrary sign the result of the operation is 1.5. */
+ Iop_Rsqrts32Fx2,
+
+ /* Unary */
+ Iop_Neg32Fx2, Iop_Abs32Fx2,
+
+
/* ------------------ 64-bit SIMD Integer. ------------------ */
/* MISC (vector integer cmp != 0) */
@@ -682,54 +725,142 @@
/* ADDITION (normal / unsigned sat / signed sat) */
Iop_Add8x8, Iop_Add16x4, Iop_Add32x2,
- Iop_QAdd8Ux8, Iop_QAdd16Ux4,
- Iop_QAdd8Sx8, Iop_QAdd16Sx4,
+ Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
+ Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
+
+ /* PAIRWISE operations */
+ /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
+ [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
+ Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2,
+ Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
+ Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
+ Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
+ Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
+ /* Longening variant is unary. The resulting vector contains two times
+ less elements than operand, but they are two times wider.
+ Example:
+ Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
+ where a+b and c+d are unsigned 32-bit values. */
+ Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
+ Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
/* SUBTRACTION (normal / unsigned sat / signed sat) */
Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2,
- Iop_QSub8Ux8, Iop_QSub16Ux4,
- Iop_QSub8Sx8, Iop_QSub16Sx4,
+ Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
+ Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
- /* MULTIPLICATION (normal / high half of signed/unsigned) */
- Iop_Mul16x4, Iop_Mul32x2,
+ /* ABSOLUTE VALUE */
+ Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
+
+ /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
+ Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
+ Iop_Mul32Fx2,
Iop_MulHi16Ux4,
Iop_MulHi16Sx4,
+ /* Plynomial multiplication treats it's arguments as coefficients of
+ polynoms over {0, 1}. */
+ Iop_PolynomialMul8x8,
+
+ /* Vector Saturating Doubling Multiply Returning High Half and
+ Vector Saturating Rounding Doubling Multiply Returning High Half */
+ /* These IROp's multiply corresponding elements in two vectors, double
+ the results, and place the most significant half of the final results
+ in the destination vector. The results are truncated or rounded. If
+ any of the results overflow, they are saturated. */
+ Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
+ Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
/* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
Iop_Avg8Ux8,
Iop_Avg16Ux4,
/* MIN/MAX */
- Iop_Max16Sx4,
- Iop_Max8Ux8,
- Iop_Min16Sx4,
- Iop_Min8Ux8,
+ Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
+ Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
+ Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
+ Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
/* COMPARISON */
Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2,
+ Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
+ /* COUNT ones / leading zeroes / leading sign bits (not including topmost
+ bit) */
+ Iop_Cnt8x8,
+ Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
+ Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
+
+ /* VECTOR x VECTOR SHIFT / ROTATE */
+ Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
+ Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
+ Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
+ Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
+
/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
- Iop_ShrN16x4, Iop_ShrN32x2,
+ Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
+ /* VECTOR x VECTOR SATURATING SHIFT */
+ Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
+ Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
+ /* VECTOR x INTEGER SATURATING SHIFT */
+ Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
+ Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
+ Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
+
/* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */
Iop_QNarrow16Ux4,
Iop_QNarrow16Sx4,
Iop_QNarrow32Sx2,
- /* INTERLEAVING -- interleave lanes from low or high halves of
+ /* INTERLEAVING */
+ /* Interleave lanes from low or high halves of
operands. Most-significant result lane is from the left
arg. */
Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
+ /* Interleave odd/even lanes of operands. Most-significant result lane
+ is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are
+ identical to Interleave{HI,LO}32x2 and so are omitted.*/
+ Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
+ Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
+
/* CONCATENATION -- build a new value by concatenating either
the even or odd lanes of both operands. Note that
Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
and so are omitted. */
- Iop_CatOddLanes16x4, Iop_CatEvenLanes16x4,
+ Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
+ Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
+
+ /* GET / SET elements of VECTOR
+ GET is binop (I64, I8) -> I<elem_size>
+ SET is triop (I64, I8, I<elem_size>) -> I64 */
+ /* Note: the arm back-end handles only constant second argument */
+ Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
+ Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
+
+ /* DUPLICATING -- copy value to all lanes */
+ Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2,
+
+ /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
+ of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
+ result.
+ It is a triop: (I64, I64, I8) -> I64 */
+ /* Note: the arm back-end handles only constant third argumnet. */
+ Iop_Extract64,
+
+ /* REVERSE the order of elements in each Half-words, Words,
+ Double-words */
+ /* Examples:
+ Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
+ Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
+ Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
+ Iop_Reverse16_8x8,
+ Iop_Reverse32_8x8, Iop_Reverse32_16x4,
+ Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
/* PERMUTING -- copy src bytes to dst,
as indexed by control vector bytes:
@@ -738,6 +869,10 @@
is undefined. */
Iop_Perm8x8,
+ /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
+ See floating-point equiwalents for details. */
+ Iop_Recip32x2, Iop_Rsqrte32x2,
+
/* ------------------ 128-bit SIMD FP. ------------------ */
/* --- 32x4 vector FP --- */
@@ -745,22 +880,59 @@
/* binary */
Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
Iop_Max32Fx4, Iop_Min32Fx4,
- /* Note: For the following compares, the ppc front-end assumes a
+ Iop_Add32Fx2, Iop_Sub32Fx2,
+ /* Note: For the following compares, the ppc and arm front-ends assume a
nan in a lane of either argument returns zero for that lane. */
- Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
+ Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
+ /* Vector Absolute */
+ Iop_Abs32Fx4,
+
+ /* Pairwise Max and Min. See integer pairwise operations for details. */
+ Iop_PwMax32Fx4, Iop_PwMin32Fx4,
+
/* unary */
- Iop_Recip32Fx4, Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
+ Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
+ Iop_Neg32Fx4,
+
+ /* Vector Reciprocal Estimate finds an approximate reciprocal of each
+ element in the operand vector, and places the results in the destination
+ vector. */
+ Iop_Recip32Fx4,
+
+ /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
+ Note, that if one of the arguments is zero and another one is infinity
+ of arbitrary sign the result of the operation is 2.0. */
+ Iop_Recps32Fx4,
+
+ /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
+ square root of each element in the operand vector. */
+ Iop_Rsqrte32Fx4,
+
+ /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
+ Note, that of one of the arguments is zero and another one is infiinty
+ of arbitrary sign the result of the operation is 1.5. */
+ Iop_Rsqrts32Fx4,
+
/* --- Int to/from FP conversion --- */
/* Unlike the standard fp conversions, these irops take no
rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
indicate the mode: {-inf, +inf, nearest, zero} respectively. */
- Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */
- Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 */
+ Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */
+ Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */
+ Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */
Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */
Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */
+ /* Fixed32 format is floating-point number with fixed number of fraction
+ bits. The number of fraction bits is passed as a second argument of
+ type I8. */
+ Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
+ Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
+
+ /* --- Single to/from half conversion --- */
+ Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */
/* --- 32x4 lowest-lane-only scalar FP --- */
@@ -826,22 +998,56 @@
Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
/* ADDITION (normal / unsigned sat / signed sat) */
- Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
- Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4,
- Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4,
+ Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
+ Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
+ Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
/* SUBTRACTION (normal / unsigned sat / signed sat) */
- Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,
- Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4,
- Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4,
+ Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,
+ Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
+ Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
/* MULTIPLICATION (normal / high half of signed/unsigned) */
- Iop_Mul16x8, Iop_Mul32x4,
- Iop_MulHi16Ux8, Iop_MulHi32Ux4,
- Iop_MulHi16Sx8, Iop_MulHi32Sx4,
+ Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4,
+ Iop_MulHi16Ux8, Iop_MulHi32Ux4,
+ Iop_MulHi16Sx8, Iop_MulHi32Sx4,
/* (widening signed/unsigned of even lanes, with lowest lane=zero) */
Iop_MullEven8Ux16, Iop_MullEven16Ux8,
Iop_MullEven8Sx16, Iop_MullEven16Sx8,
+ /* FIXME: document these */
+ Iop_Mull8Ux8, Iop_Mull8Sx8,
+ Iop_Mull16Ux4, Iop_Mull16Sx4,
+ Iop_Mull32Ux2, Iop_Mull32Sx2,
+ /* Vector Saturating Doubling Multiply Returning High Half and
+ Vector Saturating Rounding Doubling Multiply Returning High Half */
+ /* These IROp's multiply corresponding elements in two vectors, double
+ the results, and place the most significant half of the final results
+ in the destination vector. The results are truncated or rounded. If
+ any of the results overflow, they are saturated. */
+ Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
+ Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
+ /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
+ Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
+ /* Plynomial multiplication treats it's arguments as coefficients of
+ polynoms over {0, 1}. */
+ Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
+ Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */
+
+ /* PAIRWISE operations */
+ /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
+ [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
+ Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
+ Iop_PwAdd32Fx2,
+ /* Longening variant is unary. The resulting vector contains two times
+ less elements than operand, but they are two times wider.
+ Example:
+ Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
+ where a+b and c+d are unsigned 32-bit values. */
+ Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
+ Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
+
+ /* ABSOLUTE VALUE */
+ Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
/* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
@@ -858,40 +1064,110 @@
Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
+ /* COUNT ones / leading zeroes / leading sign bits (not including topmost
+ bit) */
+ Iop_Cnt8x16,
+ Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
+ Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
+
/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
- Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4,
+ Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
/* VECTOR x VECTOR SHIFT / ROTATE */
- Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4,
- Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4,
- Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4,
+ Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
+ Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
+ Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
+ Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
+ /* VECTOR x VECTOR SATURATING SHIFT */
+ Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
+ Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
+ /* VECTOR x INTEGER SATURATING SHIFT */
+ Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
+ Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
+ Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
+
/* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
/* Note: the 16{U,S} and 32{U,S} are the pre-narrow lane widths. */
Iop_QNarrow16Ux8, Iop_QNarrow32Ux4,
Iop_QNarrow16Sx8, Iop_QNarrow32Sx4,
Iop_Narrow16x8, Iop_Narrow32x4,
+ /* Shortening V128->I64, lo half from each element */
+ Iop_Shorten16x8, Iop_Shorten32x4, Iop_Shorten64x2,
+ /* Saturating shortening from signed source to signed/unsigned destination */
+ Iop_QShortenS16Sx8, Iop_QShortenS32Sx4, Iop_QShortenS64Sx2,
+ Iop_QShortenU16Sx8, Iop_QShortenU32Sx4, Iop_QShortenU64Sx2,
+ /* Saturating shortening from unsigned source to unsigned destination */
+ Iop_QShortenU16Ux8, Iop_QShortenU32Ux4, Iop_QShortenU64Ux2,
- /* INTERLEAVING -- interleave lanes from low or high halves of
+ /* WIDENING */
+ /* Longening --- sign or zero extends each element of the argument
+ vector to the twice original size. The resulting vector consists of
+ the same number of elements but each element and the vector itself
+ are two times wider.
+ All operations are I64->V128.
+ Example
+ Iop_Longen32Sx2( [a, b] ) = [c, d]
+ where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
+ Iop_Longen8Ux8, Iop_Longen16Ux4, Iop_Longen32Ux2,
+ Iop_Longen8Sx8, Iop_Longen16Sx4, Iop_Longen32Sx2,
+
+ /* INTERLEAVING */
+ /* Interleave lanes from low or high halves of
operands. Most-significant result lane is from the left
arg. */
Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
- Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
+ Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
+ /* Interleave odd/even lanes of operands. Most-significant result lane
+ is from the left arg. */
+ Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
+ Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
+ Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
+
+ /* CONCATENATION -- build a new value by concatenating either
+ the even or odd lanes of both operands. */
+ Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
+ Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
+
+ /* GET elements of VECTOR
+ GET is binop (V128, I8) -> I<elem_size> */
+ /* Note: the arm back-end handles only constant second argument. */
+ Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
/* DUPLICATING -- copy value to all lanes */
- Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4,
+ Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4,
+
+ /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
+ of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
+ result.
+ It is a triop: (V128, V128, I8) -> V128 */
+ /* Note: the ARM back end handles only constant arg3 in this operation. */
+ Iop_ExtractV128,
+
+ /* REVERSE the order of elements in each Half-words, Words,
+ Double-words */
+ /* Examples:
+ Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
+ Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
+ Iop_Reverse16_8x16,
+ Iop_Reverse32_8x16, Iop_Reverse32_16x8,
+ Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
/* PERMUTING -- copy src bytes to dst,
as indexed by control vector bytes:
for i in 0 .. 15 . result[i] = argL[ argR[i] ]
argR[i] values may only be in the range 0 .. 15, else behaviour
is undefined. */
- Iop_Perm8x16
+ Iop_Perm8x16,
+
+ /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
+ See floating-point equiwalents for details. */
+ Iop_Recip32x4, Iop_Rsqrte32x4
}
IROp;
@@ -1178,6 +1454,8 @@
IRExpr*, IRExpr* );
extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
IRExpr*, IRExpr*, IRExpr* );
+extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
+ IRExpr*, IRExpr*, IRExpr*, IRExpr*);
/* IRExpr copiers:
- shallowCopy: shallow-copy (ie. create a new vector that shares the
diff --git a/test_main.c b/test_main.c
index dad1270..2fc41a6 100644
--- a/test_main.c
+++ b/test_main.c
@@ -1647,13 +1647,21 @@
return binary16Ix8(mce, vatom1, vatom2);
case Iop_Sub32x4:
+ case Iop_QSub32Sx4:
+ case Iop_QSub32Ux4:
case Iop_CmpGT32Sx4:
case Iop_CmpEQ32x4:
case Iop_Add32x4:
+ case Iop_QAdd32Ux4:
+ case Iop_QAdd32Sx4:
return binary32Ix4(mce, vatom1, vatom2);
case Iop_Sub64x2:
+ case Iop_QSub64Ux2:
+ case Iop_QSub64Sx2:
case Iop_Add64x2:
+ case Iop_QAdd64Ux2:
+ case Iop_QAdd64Sx2:
return binary64Ix2(mce, vatom1, vatom2);
case Iop_QNarrow32Sx4: