More AVX insns:
VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r
VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r
VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r
VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r
VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r
VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r
VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r
VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r
VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r
VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r
VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r
VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r
VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib
(Jakub Jelinek, jakub@redhat.com), #273475 comment 137.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2409 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_amd64_toIR.c b/priv/guest_amd64_toIR.c
index 4ae6316..f4c5625 100644
--- a/priv/guest_amd64_toIR.c
+++ b/priv/guest_amd64_toIR.c
@@ -1493,6 +1493,11 @@
return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
}
+static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
+{
+ return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
+}
+
static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
{
return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
@@ -1516,6 +1521,12 @@
stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
}
+static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
+}
+
static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
{
vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
@@ -10866,6 +10877,29 @@
}
+static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
+{
+ IRTemp s1, s0, d1, d0;
+ s1 = s0 = d1 = d0 = IRTemp_INVALID;
+
+ breakupV128to64s( sV, &s1, &s0 );
+ breakupV128to64s( dV, &d1, &d0 );
+
+ IRTemp res = newTemp(Ity_V128);
+ assign( res,
+ binop(Iop_64HLtoV128,
+ mkIRExprCCall(Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_psadbw",
+ &amd64g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
+ mkIRExprCCall(Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_psadbw",
+ &amd64g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
+ return res;
+}
+
+
static Long dis_MASKMOVDQU ( VexAbiInfo* vbi, Prefix pfx,
Long delta, Bool isAvx )
{
@@ -13818,47 +13852,24 @@
/* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
from E(xmm or mem) to G(xmm) */
if (have66noF2noF3(pfx) && sz == 2) {
- IRTemp s1V = newTemp(Ity_V128);
- IRTemp s2V = newTemp(Ity_V128);
- IRTemp dV = newTemp(Ity_V128);
- IRTemp s1Hi = newTemp(Ity_I64);
- IRTemp s1Lo = newTemp(Ity_I64);
- IRTemp s2Hi = newTemp(Ity_I64);
- IRTemp s2Lo = newTemp(Ity_I64);
- IRTemp dHi = newTemp(Ity_I64);
- IRTemp dLo = newTemp(Ity_I64);
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx,modrm);
if (epartIsReg(modrm)) {
- assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ UInt rE = eregOfRexRM(pfx,modrm);
+ assign( sV, getXMMReg(rE) );
delta += 1;
- DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
- nameXMMReg(gregOfRexRM(pfx,modrm)));
+ DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
} else {
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
delta += alen;
- DIP("psadbw %s,%s\n", dis_buf,
- nameXMMReg(gregOfRexRM(pfx,modrm)));
+ DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
}
- assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
- assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
- assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
- assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
- assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
- assign( dHi, mkIRExprCCall(
- Ity_I64, 0/*regparms*/,
- "amd64g_calculate_mmx_psadbw",
- &amd64g_calculate_mmx_psadbw,
- mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
- ));
- assign( dLo, mkIRExprCCall(
- Ity_I64, 0/*regparms*/,
- "amd64g_calculate_mmx_psadbw",
- &amd64g_calculate_mmx_psadbw,
- mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
- ));
- assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
- putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+ assign( dV, getXMMReg(rG) );
+ putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
+
goto decode_success;
}
break;
@@ -14000,6 +14011,38 @@
}
+static Long dis_MOVDDUP_256 ( VexAbiInfo* vbi, Prefix pfx,
+ Long delta )
+{
+ IRTemp addr = IRTemp_INVALID;
+ Int alen = 0;
+ HChar dis_buf[50];
+ IRTemp d0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx,modrm);
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx,modrm);
+ DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
+ delta += 1;
+ assign ( d0, getYMMRegLane64(rE, 0) );
+ assign ( d1, getYMMRegLane64(rE, 2) );
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
+ mkexpr(addr), mkU64(16))) );
+ DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
+ delta += alen;
+ }
+ putYMMRegLane64( rG, 0, mkexpr(d0) );
+ putYMMRegLane64( rG, 1, mkexpr(d0) );
+ putYMMRegLane64( rG, 2, mkexpr(d1) );
+ putYMMRegLane64( rG, 3, mkexpr(d1) );
+ return delta;
+}
+
+
static Long dis_MOVSxDUP_128 ( VexAbiInfo* vbi, Prefix pfx,
Long delta, Bool isAvx, Bool isL )
{
@@ -16544,6 +16587,61 @@
/*--- ---*/
/*------------------------------------------------------------*/
+static Long dis_PEXTRW ( VexAbiInfo* vbi, Prefix pfx,
+ Long delta, Bool isAvx )
+{
+ IRTemp addr = IRTemp_INVALID;
+ IRTemp t0 = IRTemp_INVALID;
+ IRTemp t1 = IRTemp_INVALID;
+ IRTemp t2 = IRTemp_INVALID;
+ IRTemp t3 = IRTemp_INVALID;
+ UChar modrm = getUChar(delta);
+ Int alen = 0;
+ HChar dis_buf[50];
+ UInt rG = gregOfRexRM(pfx,modrm);
+ Int imm8_20;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp d16 = newTemp(Ity_I16);
+ HChar* mbV = isAvx ? "v" : "";
+
+ vassert(0==getRexW(pfx)); /* ensured by caller */
+ assign( xmm_vec, getXMMReg(rG) );
+ breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_20 = (Int)(getUChar(delta+1) & 7);
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+ imm8_20 = (Int)(getUChar(delta+alen) & 7);
+ }
+
+ switch (imm8_20) {
+ case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
+ case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
+ case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
+ case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
+ case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
+ case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
+ case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
+ case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
+ default: vassert(0);
+ }
+
+ if ( epartIsReg( modrm ) ) {
+ UInt rE = eregOfRexRM(pfx,modrm);
+ putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
+ delta += 1+1;
+ DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
+ nameXMMReg( rG ), nameIReg32( rE ) );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(d16) );
+ delta += alen+1;
+ DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
+ }
+ return delta;
+}
+
+
static Long dis_PEXTRD ( VexAbiInfo* vbi, Prefix pfx,
Long delta, Bool isAvx )
{
@@ -17423,48 +17521,7 @@
Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
(XMM) */
if (have66noF2noF3(pfx) && sz == 2) {
-
- Int imm8_20;
- IRTemp xmm_vec = newTemp(Ity_V128);
- IRTemp src_word = newTemp(Ity_I16);
-
- modrm = getUChar(delta);
- assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
- breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
-
- if ( epartIsReg( modrm ) ) {
- imm8_20 = (Int)(getUChar(delta+1) & 7);
- } else {
- addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
- imm8_20 = (Int)(getUChar(delta+alen) & 7);
- }
-
- switch ( imm8_20 ) {
- case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break;
- case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break;
- case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break;
- case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break;
- case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break;
- case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break;
- case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break;
- case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break;
- default: vassert(0);
- }
-
- if ( epartIsReg( modrm ) ) {
- putIReg64( eregOfRexRM(pfx,modrm),
- unop(Iop_16Uto64, mkexpr(src_word)) );
- delta += 1+1;
- DIP( "pextrw $%d, %s,%s\n", imm8_20,
- nameXMMReg( gregOfRexRM(pfx, modrm) ),
- nameIReg64( eregOfRexRM(pfx, modrm) ) );
- } else {
- storeLE( mkexpr(addr), mkexpr(src_word) );
- delta += alen+1;
- DIP( "pextrw $%d, %s,%s\n",
- imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
- }
-
+ delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
goto decode_success;
}
break;
@@ -21519,6 +21576,11 @@
delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
goto decode_success;
}
+ /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
+ if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
+ delta = dis_MOVDDUP_256( vbi, pfx, delta );
+ goto decode_success;
+ }
/* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
/* Insn only exists in reg form */
if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
@@ -21538,10 +21600,12 @@
*uses_vvvv = True;
goto decode_success;
}
+ /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
+ /* Insn exists only in mem form, it appears. */
/* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
/* Insn exists only in mem form, it appears. */
- if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
- && !epartIsReg(getUChar(delta))) {
+ if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
+ && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
UChar modrm = getUChar(delta);
UInt rG = gregOfRexRM(pfx, modrm);
UInt rV = getVexNvvvv(pfx);
@@ -21571,10 +21635,12 @@
break;
case 0x13:
+ /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
+ /* Insn exists only in mem form, it appears. */
/* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
/* Insn exists only in mem form, it appears. */
- if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
- && !epartIsReg(getUChar(delta))) {
+ if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
+ && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
UChar modrm = getUChar(delta);
UInt rG = gregOfRexRM(pfx, modrm);
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
@@ -22224,6 +22290,27 @@
}
break;
+ case 0x53:
+ /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
+ if (haveF3no66noF2(pfx)) {
+ delta = dis_AVX128_E_V_to_G_lo32_unary(
+ uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_Recip32F0x4 );
+ goto decode_success;
+ }
+ /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
+ if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+ delta = dis_AVX128_E_to_G_unary_all(
+ uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx4 );
+ goto decode_success;
+ }
+ /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
+ if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
+ delta = dis_AVX256_E_to_G_unary_all(
+ uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx8 );
+ goto decode_success;
+ }
+ break;
+
case 0x54:
/* VANDPD r/m, rV, r ::: r = rV & r/m */
/* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
@@ -23301,14 +23388,20 @@
/* Moves from G to E, so is a store-form insn */
/* Intel docs list this in the VMOVD entry for some reason. */
if (have66noF2noF3(pfx)
- && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/
- && epartIsReg(getUChar(delta))) {
+ && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
UChar modrm = getUChar(delta);
UInt rG = gregOfRexRM(pfx,modrm);
- UInt rE = eregOfRexRM(pfx,modrm);
- DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
- putIReg64(rE, getXMMRegLane64(rG, 0));
- delta += 1;
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx,modrm);
+ DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
+ putIReg64(rE, getXMMRegLane64(rG, 0));
+ delta += 1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
+ DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
+ delta += alen;
+ }
goto decode_success;
}
/* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
@@ -24100,6 +24193,16 @@
}
break;
+ case 0xF6:
+ /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+ delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
+ uses_vvvv, vbi, pfx, delta,
+ "vpsadbw", math_PSADBW_128 );
+ goto decode_success;
+ }
+ break;
+
case 0xF7:
/* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
@@ -24327,6 +24430,103 @@
}
break;
+ case 0x08:
+ case 0x09:
+ case 0x0A:
+ /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
+ /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
+ /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi, sLo, dHi, dLo;
+ sHi = sLo = dHi = dLo = IRTemp_INVALID;
+ UChar ch = '?';
+ Int laneszB = 0;
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx,modrm);
+ UInt rV = getVexNvvvv(pfx);
+
+ switch (opc) {
+ case 0x08: laneszB = 1; ch = 'b'; break;
+ case 0x09: laneszB = 2; ch = 'w'; break;
+ case 0x0A: laneszB = 4; ch = 'd'; break;
+ default: vassert(0);
+ }
+
+ assign( dV, getXMMReg(rV) );
+
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx,modrm);
+ assign( sV, getXMMReg(rE) );
+ delta += 1;
+ DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
+ nameXMMReg(rV), nameXMMReg(rG));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += alen;
+ DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
+ nameXMMReg(rV), nameXMMReg(rG));
+ }
+
+ breakupV128to64s( dV, &dHi, &dLo );
+ breakupV128to64s( sV, &sHi, &sLo );
+
+ putYMMRegLoAndZU(
+ rG,
+ binop(Iop_64HLtoV128,
+ dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
+ dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
+ )
+ );
+ *uses_vvvv = True;
+ goto decode_success;
+ }
+ break;
+
+ case 0x0B:
+ /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi, sLo, dHi, dLo;
+ sHi = sLo = dHi = dLo = IRTemp_INVALID;
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx,modrm);
+ UInt rV = getVexNvvvv(pfx);
+
+ assign( dV, getXMMReg(rV) );
+
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx,modrm);
+ assign( sV, getXMMReg(rE) );
+ delta += 1;
+ DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
+ nameXMMReg(rV), nameXMMReg(rG));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += alen;
+ DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
+ nameXMMReg(rV), nameXMMReg(rG));
+ }
+
+ breakupV128to64s( dV, &dHi, &dLo );
+ breakupV128to64s( sV, &sHi, &sLo );
+
+ putYMMRegLoAndZU(
+ rG,
+ binop(Iop_64HLtoV128,
+ dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
+ dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
+ )
+ );
+ *uses_vvvv = True;
+ goto decode_success;
+ }
+ break;
+
case 0x0C:
/* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
if (have66noF2noF3(pfx)
@@ -24497,7 +24697,7 @@
IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
putYMMRegLoAndZU(rG, res);
goto decode_success;
- }
+ }
/* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
if (have66noF2noF3(pfx)
&& 1==getVexL(pfx)/*256*/
@@ -24515,8 +24715,8 @@
mkexpr(t64), mkexpr(t64));
putYMMReg(rG, res);
goto decode_success;
- }
- break;
+ }
+ break;
case 0x19:
/* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
@@ -24534,8 +24734,25 @@
mkexpr(t64), mkexpr(t64));
putYMMReg(rG, res);
goto decode_success;
- }
- break;
+ }
+ break;
+
+ case 0x1A:
+ /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
+ if (have66noF2noF3(pfx)
+ && 1==getVexL(pfx)/*256*/
+ && !epartIsReg(getUChar(delta))) {
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx, modrm);
+ addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
+ IRTemp t128 = newTemp(Ity_V128);
+ assign(t128, loadLE(Ity_V128, mkexpr(addr)));
+ putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
+ goto decode_success;
+ }
+ break;
case 0x1C:
/* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
@@ -25534,6 +25751,16 @@
}
break;
+ case 0x15:
+ /* VPEXTRW imm8, reg/m16, xmm2 */
+ /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
+ if (have66noF2noF3(pfx)
+ && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
+ delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
+ goto decode_success;
+ }
+ break;
+
case 0x16:
/* VPEXTRD imm8, r32/m32, xmm2 */
/* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
diff --git a/priv/host_amd64_isel.c b/priv/host_amd64_isel.c
index b299348..86eb2d8 100644
--- a/priv/host_amd64_isel.c
+++ b/priv/host_amd64_isel.c
@@ -3444,6 +3444,7 @@
return;
}
+ case Iop_Recip32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
case Iop_RSqrt32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
do_32Fx8_unary:
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index 3c93aa9..71a8e27 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -619,6 +619,7 @@
case Iop_Recip32x2: vex_printf("Recip32x2"); return;
case Iop_Recip32Fx2: vex_printf("Recip32Fx2"); return;
case Iop_Recip32Fx4: vex_printf("Recip32Fx4"); return;
+ case Iop_Recip32Fx8: vex_printf("Recip32Fx8"); return;
case Iop_Recip32x4: vex_printf("Recip32x4"); return;
case Iop_Recip32F0x4: vex_printf("Recip32F0x4"); return;
case Iop_Recip64Fx2: vex_printf("Recip64Fx2"); return;
@@ -2826,6 +2827,7 @@
case Iop_RSqrt32Fx8:
case Iop_Sqrt32Fx8:
case Iop_Sqrt64Fx4:
+ case Iop_Recip32Fx8:
UNARY(Ity_V256, Ity_V256);
default:
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index b1fd1f4..acac2c3 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -1454,6 +1454,7 @@
Iop_Sqrt32Fx8,
Iop_Sqrt64Fx4,
Iop_RSqrt32Fx8,
+ Iop_Recip32Fx8,
Iop_Max32Fx8, Iop_Min32Fx8,
Iop_Max64Fx4, Iop_Min64Fx4