More support for memchecking 128-bit SIMD code.
git-svn-id: svn://svn.valgrind.org/vex/trunk@644 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host-x86/hdefs.c b/priv/host-x86/hdefs.c
index d61f216..c6d472f 100644
--- a/priv/host-x86/hdefs.c
+++ b/priv/host-x86/hdefs.c
@@ -1855,6 +1855,8 @@
subopc_imm = 0; opc_imma = 0x05; break;
case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
subopc_imm = 5; opc_imma = 0x2D; break;
+ case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
+ subopc_imm = 3; opc_imma = 0x1D; break;
case Xalu_AND: opc = 0x23; opc_rr = 0x21;
subopc_imm = 4; opc_imma = 0x25; break;
case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
diff --git a/priv/host-x86/isel.c b/priv/host-x86/isel.c
index a913691..7d14df2 100644
--- a/priv/host-x86/isel.c
+++ b/priv/host-x86/isel.c
@@ -2450,6 +2450,33 @@
if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ32x4: {
+ /* sigh, we have to generate crappy code for SSE1 */
+ /* basically, the idea is: for each lane:
+ movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
+ sbbl %r, %r (now %r = 1Sto32(CF))
+ movl %r, lane
+ */
+ Int i;
+ X86AMode* am;
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ HReg r32 = newVRegI(env);
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
+ for (i = 0; i < 4; i++) {
+ am = X86AMode_IR(i*4, hregX86_ESP());
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
+ addInstr(env, X86Instr_Unary32(Xun_NEG, X86RM_Reg(r32)));
+ addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
+ }
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
diff --git a/priv/ir/irdefs.c b/priv/ir/irdefs.c
index 1308c64..0f0306c 100644
--- a/priv/ir/irdefs.c
+++ b/priv/ir/irdefs.c
@@ -295,9 +295,10 @@
case Iop_Set128lo32: vex_printf("Set128lo32"); return;
case Iop_Set128lo64: vex_printf("Set128lo64"); return;
- case Iop_And128: vex_printf("And128"); return;
- case Iop_Or128: vex_printf("Or128"); return;
- case Iop_Xor128: vex_printf("Xor128"); return;
+ case Iop_And128: vex_printf("And128"); return;
+ case Iop_Or128: vex_printf("Or128"); return;
+ case Iop_Xor128: vex_printf("Xor128"); return;
+ case Iop_CmpNEZ32x4: vex_printf("CmpNEZ32x4"); return;
case Iop_Add8x16: vex_printf("Add8x16"); return;
case Iop_Add16x8: vex_printf("Add16x8"); return;
@@ -1275,6 +1276,7 @@
case Iop_RSqrt64Fx2: case Iop_RSqrt64F0x2:
case Iop_Sqrt32Fx4: case Iop_Sqrt32F0x4:
case Iop_Sqrt64Fx2: case Iop_Sqrt64F0x2:
+ case Iop_CmpNEZ32x4:
UNARY(Ity_V128, Ity_V128);
case Iop_ShlN16x8: case Iop_ShlN32x4: case Iop_ShlN64x2:
diff --git a/pub/libvex_guest_x86.h b/pub/libvex_guest_x86.h
index 2201194..ac95b04 100644
--- a/pub/libvex_guest_x86.h
+++ b/pub/libvex_guest_x86.h
@@ -124,34 +124,34 @@
typedef
struct {
- UInt guest_EAX;
+ UInt guest_EAX; /* 0 */
UInt guest_ECX;
UInt guest_EDX;
UInt guest_EBX;
UInt guest_ESP;
UInt guest_EBP;
UInt guest_ESI;
- UInt guest_EDI;
+ UInt guest_EDI; /* 28 */
/* 4-word thunk used to calculate O S Z A C P flags. */
- UInt guest_CC_OP;
+ UInt guest_CC_OP; /* 32 */
UInt guest_CC_DEP1;
UInt guest_CC_DEP2;
- UInt guest_CC_NDEP;
+ UInt guest_CC_NDEP; /* 44 */
/* The D flag is stored here, encoded as either -1 or +1 */
- UInt guest_DFLAG;
+ UInt guest_DFLAG; /* 48 */
/* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
- UInt guest_IDFLAG;
+ UInt guest_IDFLAG; /* 52 */
/* EIP */
- UInt guest_EIP;
+ UInt guest_EIP; /* 56 */
/* FPU */
- UInt guest_FTOP;
- ULong guest_FPREG[8];
- UChar guest_FPTAG[8];
- UInt guest_FPROUND;
- UInt guest_FC3210;
+ UInt guest_FTOP; /* 60 */
+ ULong guest_FPREG[8]; /* 64 */
+ UChar guest_FPTAG[8]; /* 128 */
+ UInt guest_FPROUND; /* 136 */
+ UInt guest_FC3210; /* 140 */
/* SSE */
- UInt guest_SSEROUND;
- U128 guest_XMM0;
+ UInt guest_SSEROUND; /* 144 */
+ U128 guest_XMM0; /* 148 */
U128 guest_XMM1;
U128 guest_XMM2;
U128 guest_XMM3;
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index d8d8cc5..d923962 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -377,21 +377,27 @@
/* --- pack / unpack --- */
- /* 64 <-> 128 bit pack/unpack */
+ /* 64 <-> 128 bit */
Iop_128to64, // :: V128 -> I64, low half
Iop_128HIto64, // :: V128 -> I64, high half
Iop_64HLto128, // :: (I64,I64) -> V128
- Iop_32Uto128,
Iop_64Uto128,
- Iop_Set128lo32,
Iop_Set128lo64,
+ /* 32 <-> 128 bit */
+ Iop_32Uto128,
+ Iop_128to32, // :: V128 -> I32, lowest lane
+ Iop_Set128lo32, // :: (V128,I32) -> V128
+
/* ------------------ 128-bit SIMD Integer. ------------------ */
/* BITWISE OPS */
Iop_And128, Iop_Or128, Iop_Xor128,
+ /* MISC (32x4 integer cmp != 0) */
+ Iop_CmpNEZ32x4,
+
/* ADDITION (normal / unsigned sat / signed sat) */
Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2,
Iop_QAdd8Ux16, Iop_QAdd16Ux8,