Make a start on SSE for x86 guest.
git-svn-id: svn://svn.valgrind.org/vex/trunk@602 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest-x86/ghelpers.c b/priv/guest-x86/ghelpers.c
index 855d753..06b6088 100644
--- a/priv/guest-x86/ghelpers.c
+++ b/priv/guest-x86/ghelpers.c
@@ -1581,6 +1581,20 @@
vex_state->guest_FPROUND = (UInt)Irrm_NEAREST;
vex_state->guest_FC3210 = 0;
+# define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
+
+ vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
+ SSEZERO(vex_state->guest_XMM0);
+ SSEZERO(vex_state->guest_XMM1);
+ SSEZERO(vex_state->guest_XMM2);
+ SSEZERO(vex_state->guest_XMM3);
+ SSEZERO(vex_state->guest_XMM4);
+ SSEZERO(vex_state->guest_XMM5);
+ SSEZERO(vex_state->guest_XMM6);
+ SSEZERO(vex_state->guest_XMM7);
+
+# undef SSEZERO
+
vex_state->guest_CS = 0;
vex_state->guest_DS = 0;
vex_state->guest_ES = 0;
diff --git a/priv/guest-x86/toIR.c b/priv/guest-x86/toIR.c
index 6dd1aed..409cc24 100644
--- a/priv/guest-x86/toIR.c
+++ b/priv/guest-x86/toIR.c
@@ -103,26 +103,48 @@
/*--- Offsets of various parts of the x86 guest state. ---*/
/*------------------------------------------------------------*/
-#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
-#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
-#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
-#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
-#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
-#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
-#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
+#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
+#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
+#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
+#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
+#define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
+#define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
+#define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
+#define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
-#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
-#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
-#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
-#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
+#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
-#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
-#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
-#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
-#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
-#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
+#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
+#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
+#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
+#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
-#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
+#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
+#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
+#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
+#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
+#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
+#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
+#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
+
+#define OFFB_CS offsetof(VexGuestX86State,guest_CS)
+#define OFFB_DS offsetof(VexGuestX86State,guest_DS)
+#define OFFB_ES offsetof(VexGuestX86State,guest_ES)
+#define OFFB_FS offsetof(VexGuestX86State,guest_FS)
+#define OFFB_GS offsetof(VexGuestX86State,guest_GS)
+#define OFFB_SS offsetof(VexGuestX86State,guest_SS)
+
+#define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
+#define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
+#define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
+#define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
+#define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
+#define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
+#define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
+#define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
+#define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
+
+#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
/*------------------------------------------------------------*/
@@ -461,24 +483,24 @@
if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
switch (archreg) {
- case R_EAX: return offsetof(VexGuestX86State,guest_EAX);
- case R_EBX: return offsetof(VexGuestX86State,guest_EBX);
- case R_ECX: return offsetof(VexGuestX86State,guest_ECX);
- case R_EDX: return offsetof(VexGuestX86State,guest_EDX);
- case R_ESI: return offsetof(VexGuestX86State,guest_ESI);
- case R_EDI: return offsetof(VexGuestX86State,guest_EDI);
- case R_ESP: return offsetof(VexGuestX86State,guest_ESP);
- case R_EBP: return offsetof(VexGuestX86State,guest_EBP);
+ case R_EAX: return OFFB_EAX;
+ case R_EBX: return OFFB_EBX;
+ case R_ECX: return OFFB_ECX;
+ case R_EDX: return OFFB_EDX;
+ case R_ESI: return OFFB_ESI;
+ case R_EDI: return OFFB_EDI;
+ case R_ESP: return OFFB_ESP;
+ case R_EBP: return OFFB_EBP;
default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
}
}
vassert(archreg >= 4 && archreg < 8 && sz == 1);
switch (archreg-4) {
- case R_EAX: return 1+ offsetof(VexGuestX86State,guest_EAX);
- case R_EBX: return 1+ offsetof(VexGuestX86State,guest_EBX);
- case R_ECX: return 1+ offsetof(VexGuestX86State,guest_ECX);
- case R_EDX: return 1+ offsetof(VexGuestX86State,guest_EDX);
+ case R_EAX: return 1+ OFFB_EAX;
+ case R_EBX: return 1+ OFFB_EBX;
+ case R_ECX: return 1+ OFFB_ECX;
+ case R_EDX: return 1+ OFFB_EDX;
default: vpanic("integerGuestRegOffset(x86,le)(1h)");
}
@@ -489,16 +511,31 @@
static Int segmentGuestRegOffset ( UInt sreg )
{
switch (sreg) {
- case R_ES: return offsetof(VexGuestX86State,guest_ES);
- case R_CS: return offsetof(VexGuestX86State,guest_CS);
- case R_SS: return offsetof(VexGuestX86State,guest_SS);
- case R_DS: return offsetof(VexGuestX86State,guest_DS);
- case R_FS: return offsetof(VexGuestX86State,guest_FS);
- case R_GS: return offsetof(VexGuestX86State,guest_GS);
+ case R_ES: return OFFB_ES;
+ case R_CS: return OFFB_CS;
+ case R_SS: return OFFB_SS;
+ case R_DS: return OFFB_DS;
+ case R_FS: return OFFB_FS;
+ case R_GS: return OFFB_GS;
default: vpanic("segmentGuestRegOffset(x86)");
}
}
+static Int xmmGuestRegOffset ( UInt xmmreg )
+{
+ switch (xmmreg) {
+ case 0: return OFFB_XMM0;
+ case 1: return OFFB_XMM1;
+ case 2: return OFFB_XMM2;
+ case 3: return OFFB_XMM3;
+ case 4: return OFFB_XMM4;
+ case 5: return OFFB_XMM5;
+ case 6: return OFFB_XMM6;
+ case 7: return OFFB_XMM7;
+ default: vpanic("xmmGuestRegOffset");
+ }
+}
+
static IRExpr* getIReg ( Int sz, UInt archreg )
{
vassert(sz == 1 || sz == 2 || sz == 4);
@@ -510,8 +547,10 @@
/* Ditto, but write to a reg instead. */
static void putIReg ( Int sz, UInt archreg, IRExpr* e )
{
+ IRType ty = typeOfIRExpr(irbb->tyenv, e);
vassert(sz == 1 || sz == 2 || sz == 4);
vassert(archreg < 8);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
}
@@ -528,6 +567,17 @@
}
#endif
+static IRExpr* getXMMReg ( UInt xmmreg )
+{
+ return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
+}
+
+static void putXMMReg ( UInt xmmreg, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irbb->tyenv,e) == Ity_V128);
+ stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
+}
+
static void assign ( IRTemp dst, IRExpr* e )
{
stmt( IRStmt_Tmp(dst, e) );
@@ -1258,33 +1308,33 @@
-static Char* nameGrp1 ( Int opc_aux )
+static HChar* nameGrp1 ( Int opc_aux )
{
- static Char* grp1_names[8]
+ static HChar* grp1_names[8]
= { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
return grp1_names[opc_aux];
}
-static Char* nameGrp2 ( Int opc_aux )
+static HChar* nameGrp2 ( Int opc_aux )
{
- static Char* grp2_names[8]
+ static HChar* grp2_names[8]
= { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
return grp2_names[opc_aux];
}
-static Char* nameGrp4 ( Int opc_aux )
+static HChar* nameGrp4 ( Int opc_aux )
{
- static Char* grp4_names[8]
+ static HChar* grp4_names[8]
= { "inc", "dec", "???", "???", "???", "???", "???", "???" };
if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
return grp4_names[opc_aux];
}
-static Char* nameGrp5 ( Int opc_aux )
+static HChar* nameGrp5 ( Int opc_aux )
{
- static Char* grp5_names[8]
+ static HChar* grp5_names[8]
= { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
return grp5_names[opc_aux];
@@ -1298,14 +1348,14 @@
//-- return grp8_names[opc_aux];
//-- }
-static Char* nameIReg ( Int size, Int reg )
+static HChar* nameIReg ( Int size, Int reg )
{
- static Char* ireg32_names[8]
+ static HChar* ireg32_names[8]
= { "%eax", "%ecx", "%edx", "%ebx",
"%esp", "%ebp", "%esi", "%edi" };
- static Char* ireg16_names[8]
+ static HChar* ireg16_names[8]
= { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
- static Char* ireg8_names[8]
+ static HChar* ireg8_names[8]
= { "%al", "%cl", "%dl", "%bl",
"%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
if (reg < 0 || reg > 7) goto bad;
@@ -1319,7 +1369,7 @@
return NULL; /*notreached*/
}
-static Char* nameSReg ( UInt sreg )
+static HChar* nameSReg ( UInt sreg )
{
switch (sreg) {
case R_ES: return "%es";
@@ -1332,21 +1382,22 @@
}
}
-static Char* nameMMXReg ( Int mmxreg )
+static HChar* nameMMXReg ( Int mmxreg )
{
- static Char* mmx_names[8]
+ static HChar* mmx_names[8]
= { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
return mmx_names[mmxreg];
}
-//-- const Char* VG_(name_of_xmm_reg) ( Int xmmreg )
-//-- {
-//-- static const Char* xmm_names[8]
-//-- = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
-//-- if (xmmreg < 0 || xmmreg > 7) VG_(core_panic)("name_of_xmm_reg");
-//-- return xmm_names[xmmreg];
-//-- }
+static HChar* nameXMMReg ( Int xmmreg )
+{
+ static HChar* xmm_names[8]
+ = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
+ if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
+ return xmm_names[xmmreg];
+}
static Char* nameMMXGran ( UChar gran )
{
@@ -1855,7 +1906,7 @@
UInt delta0,
Char* t_x86opc )
{
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int len;
IRType ty = szToITy(size);
IRTemp dst1 = newTemp(ty);
@@ -1964,7 +2015,7 @@
UInt delta0,
Char* t_x86opc )
{
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int len;
IRType ty = szToITy(size);
IRTemp dst1 = newTemp(ty);
@@ -2068,7 +2119,7 @@
{
Int len;
UChar rm = getIByte(delta0);
- UChar dis_buf[50];
+ HChar dis_buf[50];
if (epartIsReg(rm)) {
putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
@@ -2112,7 +2163,7 @@
{
Int len;
UChar rm = getIByte(delta0);
- UChar dis_buf[50];
+ HChar dis_buf[50];
if (epartIsReg(rm)) {
putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
@@ -2186,7 +2237,7 @@
/* E refers to memory */
{
Int len;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
putIReg(szd, gregOfRM(rm),
@@ -2299,7 +2350,7 @@
Int am_sz, Int d_sz, Int sz, UInt d32 )
{
Int len;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRType ty = szToITy(sz);
IRTemp dst1 = newTemp(ty);
IRTemp src = newTemp(ty);
@@ -2382,7 +2433,7 @@
Char* shift_expr_txt )
{
/* delta on entry points at the modrm byte. */
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int len;
Bool isShift, isRotate, isRotateRC;
IRType ty = szToITy(sz);
@@ -2635,7 +2686,7 @@
//-- And eip on entry points at the modrm byte. */
//-- Int t1, t2, t_fetched, t_mask;
//-- UInt pair;
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UInt v_mask;
//--
//-- /* There is no 1-byte form of this instruction, AFAICS. */
@@ -2776,10 +2827,10 @@
static
UInt dis_Grp3 ( UChar sorb, Int sz, UInt delta )
{
- UInt d32;
- UChar modrm;
- UChar dis_buf[50];
- Int len;
+ UInt d32;
+ UChar modrm;
+ HChar dis_buf[50];
+ Int len;
IRTemp addr;
IRType ty = szToITy(sz);
IRTemp t1 = newTemp(ty);
@@ -2906,9 +2957,9 @@
static
UInt dis_Grp4 ( UChar sorb, UInt delta )
{
- Int alen;
+ Int alen;
UChar modrm;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRType ty = Ity_I8;
IRTemp t1 = newTemp(ty);
IRTemp t2 = newTemp(ty);
@@ -2967,7 +3018,7 @@
{
Int len;
UChar modrm;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRTemp addr = IRTemp_INVALID;
IRType ty = szToITy(sz);
IRTemp t1 = newTemp(ty);
@@ -3267,7 +3318,7 @@
UInt delta0 )
{
Int alen;
- UChar dis_buf[50];
+ HChar dis_buf[50];
UChar rm = getIByte(delta0);
IRType ty = szToITy(size);
IRTemp te = newTemp(ty);
@@ -3309,7 +3360,7 @@
Int litsize )
{
Int d32, alen;
- Char dis_buf[50];
+ HChar dis_buf[50];
UChar rm = getIByte(delta);
IRType ty = szToITy(size);
IRTemp te = newTemp(ty);
@@ -3638,7 +3689,7 @@
{
Int len;
UInt r_src, r_dst;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRTemp t1, t2;
/* On entry, delta points at the second byte of the insn (the modrm
@@ -4452,23 +4503,23 @@
d->nFxState = 5;
d->fxState[0].fx = Ifx_Write;
- d->fxState[0].offset = offsetof(VexGuestX86State,guest_FTOP);
+ d->fxState[0].offset = OFFB_FTOP;
d->fxState[0].size = sizeof(UInt);
d->fxState[1].fx = Ifx_Write;
- d->fxState[1].offset = offsetof(VexGuestX86State,guest_FPREG);
+ d->fxState[1].offset = OFFB_FPREGS;
d->fxState[1].size = 8 * sizeof(ULong);
d->fxState[2].fx = Ifx_Write;
- d->fxState[2].offset = offsetof(VexGuestX86State,guest_FPTAG);
+ d->fxState[2].offset = OFFB_FPTAGS;
d->fxState[2].size = 8 * sizeof(UChar);
d->fxState[3].fx = Ifx_Write;
- d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPROUND);
+ d->fxState[3].offset = OFFB_FPROUND;
d->fxState[3].size = sizeof(UInt);
d->fxState[4].fx = Ifx_Write;
- d->fxState[4].offset = offsetof(VexGuestX86State,guest_FC3210);
+ d->fxState[4].offset = OFFB_FC3210;
d->fxState[4].size = sizeof(UInt);
stmt( IRStmt_Dirty(d) );
@@ -4509,23 +4560,23 @@
d->nFxState = 5;
d->fxState[0].fx = Ifx_Read;
- d->fxState[0].offset = offsetof(VexGuestX86State,guest_FTOP);
+ d->fxState[0].offset = OFFB_FTOP;
d->fxState[0].size = sizeof(UInt);
d->fxState[1].fx = Ifx_Read;
- d->fxState[1].offset = offsetof(VexGuestX86State,guest_FPREG);
+ d->fxState[1].offset = OFFB_FPREGS;
d->fxState[1].size = 8 * sizeof(ULong);
d->fxState[2].fx = Ifx_Read;
- d->fxState[2].offset = offsetof(VexGuestX86State,guest_FPTAG);
+ d->fxState[2].offset = OFFB_FPTAGS;
d->fxState[2].size = 8 * sizeof(UChar);
d->fxState[3].fx = Ifx_Read;
- d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPROUND);
+ d->fxState[3].offset = OFFB_FPROUND;
d->fxState[3].size = sizeof(UInt);
d->fxState[4].fx = Ifx_Read;
- d->fxState[4].offset = offsetof(VexGuestX86State,guest_FC3210);
+ d->fxState[4].offset = OFFB_FC3210;
d->fxState[4].size = sizeof(UInt);
stmt( IRStmt_Dirty(d) );
@@ -4879,7 +4930,7 @@
Char* name,
Bool show_granularity )
{
- Char dis_buf[50];
+ HChar dis_buf[50];
UChar modrm = getIByte(delta);
Bool isReg = epartIsReg(modrm);
IRExpr* argL = NULL;
@@ -5014,7 +5065,7 @@
{
Int len;
UChar modrm;
- UChar dis_buf[50];
+ HChar dis_buf[50];
UChar opc = getIByte(delta);
delta++;
@@ -5337,7 +5388,7 @@
/* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
for printing it. And eip on entry points at the modrm byte. */
Int len;
- UChar dis_buf[50];
+ HChar dis_buf[50];
IRType ty = szToITy(sz);
IRTemp gsrc = newTemp(ty);
@@ -5463,7 +5514,7 @@
static
UInt dis_bt_G_E ( UChar sorb, Int sz, UInt delta, BtOp op )
{
- Char dis_buf[50];
+ HChar dis_buf[50];
UChar modrm;
Int len;
IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
@@ -5594,7 +5645,7 @@
{
Bool isReg;
UChar modrm;
- Char dis_buf[50];
+ HChar dis_buf[50];
IRType ty = szToITy(sz);
IRTemp src = newTemp(ty);
@@ -5760,7 +5811,7 @@
Int size,
UInt delta0 )
{
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int len;
IRType ty = szToITy(size);
@@ -5813,7 +5864,7 @@
//-- Addr eip0 )
//-- {
//-- Int tal, tah, junkl, junkh, destl, desth, srcl, srch, accl, acch;
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar rm;
//-- UInt pair;
//--
@@ -5899,7 +5950,7 @@
UInt delta0 )
{
UChar rm = getIByte(delta0);
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int len;
IRType ty = szToITy(sz);
@@ -5950,7 +6001,7 @@
{
Int len;
UChar rm = getIByte(delta0);
- UChar dis_buf[50];
+ HChar dis_buf[50];
// Int tmpd = newTemp(cb);
//Int tmpt = newTemp(cb);
@@ -6009,7 +6060,7 @@
//-- Addr eip0 )
//-- {
//-- UChar rm = getUChar(eip0);
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//--
//-- if (epartIsReg(rm)) {
//-- Int tmpv = newTemp(cb);
@@ -6054,7 +6105,7 @@
UInt delta0 )
{
UChar rm = getIByte(delta0);
- //UChar dis_buf[50];
+ //HChar dis_buf[50];
vassert(sz == 2 || sz == 4);
@@ -6099,7 +6150,7 @@
//-- Char* name,
//-- Bool show_granularity )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- Bool isReg = epartIsReg(modrm);
//--
@@ -6141,7 +6192,7 @@
//-- Char* name,
//-- Bool show_granularity )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- UChar imm8;
//-- Bool isReg = epartIsReg(modrm);
@@ -6196,7 +6247,7 @@
//-- UChar opc2,
//-- UChar opc3 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- Bool isReg = epartIsReg(modrm);
//--
@@ -6241,7 +6292,7 @@
//-- UChar opc1,
//-- UChar opc2 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- Bool isReg = epartIsReg(modrm);
//--
@@ -6285,7 +6336,7 @@
//-- UChar opc1,
//-- UChar opc2 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- UChar imm8;
//-- Bool isReg = epartIsReg(modrm);
@@ -6333,7 +6384,7 @@
//-- UChar opc2,
//-- UChar opc3 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- UChar imm8;
//-- Bool isReg = epartIsReg(modrm);
@@ -6381,7 +6432,7 @@
//-- UChar insn1,
//-- UChar insn2 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- Bool isReg = epartIsReg(modrm);
//-- UInt pair;
@@ -6431,7 +6482,7 @@
//-- UChar insn0,
//-- UChar insn1 )
//-- {
-//-- Char dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- Bool isReg = epartIsReg(modrm);
//-- UInt pair;
@@ -6484,7 +6535,7 @@
//-- UChar opc1,
//-- UChar opc2 )
//-- {
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- if (epartIsReg(modrm)) {
//-- /* Completely internal SSE insn. */
@@ -6524,7 +6575,7 @@
//-- UChar opc1,
//-- UChar opc2 )
//-- {
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- if (epartIsReg(modrm)) {
//-- /* Completely internal SSE insn. */
@@ -6565,7 +6616,7 @@
//-- UChar opc2,
//-- UChar opc3 )
//-- {
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- if (epartIsReg(modrm)) {
//-- /* Completely internal SSE insn. */
@@ -6606,7 +6657,7 @@
//-- UChar opc2,
//-- UChar opc3 )
//-- {
-//-- UChar dis_buf[50];
+//-- HChar dis_buf[50];
//-- UChar modrm = getUChar(eip);
//-- if (epartIsReg(modrm)) {
//-- /* Completely internal SSE insn. */
@@ -6668,6 +6719,47 @@
jmp_treg(Ijk_Ret,t2);
}
+/* ------ SSE/SSE2/SSE3 helpers ----- */
+
+static void putXMMRegLO64( Int xmmreg, IRExpr* e64 )
+{
+ putXMMReg(
+ xmmreg,
+ binop(Iop_64HLto128,
+ unop(Iop_128HIto64, getXMMReg(xmmreg)),
+ e64 )
+ );
+}
+
+static void putXMMRegHI64( Int xmmreg, IRExpr* e64 )
+{
+ putXMMReg(
+ xmmreg,
+ binop(Iop_64HLto128,
+ e64,
+ unop(Iop_128to64, getXMMReg(xmmreg)))
+ );
+}
+
+static UInt dis_SSE_E_to_G ( UChar sorb, UInt delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen;
+ UChar rm = getIByte(delta);
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRM(rm),
+ binop(op, getXMMReg(gregOfRM(rm)),
+ getXMMReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ vassert(0);
+ }
+}
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
@@ -6691,9 +6783,10 @@
Int alen;
UChar opc, modrm, abyte;
UInt d32;
- UChar dis_buf[50];
+ HChar dis_buf[50];
Int am_sz, d_sz;
DisResult whatNext = Dis_Continue;
+ UChar* insn; /* used in SSE decoders */
//Char loc_buf[M_VG_ERRTXT];
@@ -6787,14 +6880,87 @@
break;
}
-//-- /* ---------------------------------------------------- */
-//-- /* --- The SSE/SSE2 decoder. --- */
-//-- /* ---------------------------------------------------- */
-//--
-//-- /* If it looks like this CPU might support SSE, try decoding SSE
-//-- insns. */
-//-- if (VG_(have_ssestate)) {
-//-- UChar* insn = (UChar*)eip;
+ /* ---------------------------------------------------- */
+ /* --- The SSE decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* Note, this doesn't handle SSE2 or SSE3. */
+
+ insn = (UChar*)&guest_code[delta];
+
+ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
+ if (insn[0] == 0x0F && insn[1] == 0x12) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+
+ putXMMRegLO64( gregOfRM(insn[2]),
+ loadLE(Ity_I64, mkexpr(addr)) );
+
+ DIP("movlps %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRM(insn[2]) ));
+
+ goto decode_success;
+ }
+
+ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
+ if (insn[0] == 0x0F && insn[1] == 0x13) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+
+ storeLE( mkexpr(addr),
+ unop(Iop_128to64, getXMMReg( gregOfRM(insn[2]) )) );
+
+ DIP("movlps %s, %s\n",
+ nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+
+ goto decode_success;
+ }
+
+ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
+ if (insn[0] == 0x0F && insn[1] == 0x16) {
+ vassert(sz == 4);
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+
+ putXMMRegHI64( gregOfRM(insn[2]),
+ loadLE(Ity_I64, mkexpr(addr)) );
+
+ DIP("movhps %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRM(insn[2]) ));
+
+ goto decode_success;
+ }
+
+ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
+ if (insn[0] == 0x0F && insn[1] == 0x17) {
+ vassert(sz == 4);
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+
+ storeLE( mkexpr(addr),
+ unop(Iop_128HIto64, getXMMReg( gregOfRM(insn[2]) )) );
+
+ DIP("movhps %s, %s\n",
+ nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+
+ goto decode_success;
+ }
+
+ /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
+ if (insn[0] == 0x0F && insn[1] == 0x58) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G( sorb, delta+2, "addps", Iop_Add32Fx4 );
+ goto decode_success;
+ }
+
+
+
//--
//-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
//-- if (insn[0] == 0x0F && insn[1] == 0xAE
diff --git a/priv/ir/irdefs.c b/priv/ir/irdefs.c
index 8105796..632fc8e 100644
--- a/priv/ir/irdefs.c
+++ b/priv/ir/irdefs.c
@@ -55,6 +55,7 @@
case Ity_I64: vex_printf( "I64"); break;
case Ity_F32: vex_printf( "F32"); break;
case Ity_F64: vex_printf( "F64"); break;
+ case Ity_V128: vex_printf( "V128"); break;
default: vex_printf("ty = 0x%x\n", (Int)ty);
vpanic("ppIRType");
}
@@ -217,7 +218,13 @@
case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
case Iop_ReinterpI64asF64: vex_printf("ReinterpI64asF64"); return;
- default: vpanic("ppIROp(1)");
+ case Iop_Add32Fx4: vex_printf("Add32Fx4"); return;
+
+ case Iop_64HLto128: vex_printf("64HLto128"); return;
+ case Iop_128to64: vex_printf("128to64"); return;
+ case Iop_128HIto64: vex_printf("128HIto64"); return;
+
+ default: vpanic("ppIROp(1)");
}
switch (op - base) {
@@ -237,9 +244,9 @@
vex_printf("BIND-%d", e->Iex.Binder.binder);
break;
case Iex_Get:
- vex_printf( "GET(%d,", e->Iex.Get.offset);
+ vex_printf( "GET:" );
ppIRType(e->Iex.Get.ty);
- vex_printf(")");
+ vex_printf("(%d)", e->Iex.Get.offset);
break;
case Iex_GetI:
vex_printf( "GETI" );
@@ -1059,6 +1066,13 @@
case Iop_F32toF64: UNARY(Ity_F64,Ity_F32);
case Iop_F64toF32: UNARY(Ity_F32,Ity_F64);
+ case Iop_64HLto128: BINARY(Ity_V128, Ity_I64,Ity_I64);
+ case Iop_128to64: case Iop_128HIto64:
+ UNARY(Ity_I64, Ity_V128);
+
+ case Iop_Add32Fx4:
+ BINARY(Ity_V128, Ity_V128,Ity_V128);
+
default:
ppIROp(op);
vpanic("typeOfPrimop");
@@ -1187,6 +1201,7 @@
case Ity_INVALID: case Ity_I1:
case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
case Ity_F32: case Ity_F64:
+ case Ity_V128:
return True;
default:
return False;
@@ -1713,12 +1728,13 @@
Int sizeofIRType ( IRType ty )
{
switch (ty) {
- case Ity_I8: return 1;
- case Ity_I16: return 2;
- case Ity_I32: return 4;
- case Ity_I64: return 8;
- case Ity_F32: return 4;
- case Ity_F64: return 8;
+ case Ity_I8: return 1;
+ case Ity_I16: return 2;
+ case Ity_I32: return 4;
+ case Ity_I64: return 8;
+ case Ity_F32: return 4;
+ case Ity_F64: return 8;
+ case Ity_V128: return 16;
default: vex_printf("\n"); ppIRType(ty); vex_printf("\n");
vpanic("sizeofIRType");
}
diff --git a/priv/main/vex_main.c b/priv/main/vex_main.c
index b6e70b6..4fb4860 100644
--- a/priv/main/vex_main.c
+++ b/priv/main/vex_main.c
@@ -125,6 +125,7 @@
vassert(1 == sizeof(Bool));
vassert(4 == sizeof(Addr32));
vassert(8 == sizeof(Addr64));
+ vassert(16 == sizeof(U128));
vassert(sizeof(void*) == 4 || sizeof(void*) == 8);
vassert(sizeof(void*) == sizeof(int*));
diff --git a/pub/libvex_basictypes.h b/pub/libvex_basictypes.h
index cf800c6..ef06af2 100644
--- a/pub/libvex_basictypes.h
+++ b/pub/libvex_basictypes.h
@@ -59,6 +59,9 @@
typedef unsigned long long int ULong;
typedef signed long long int Long;
+/* Always 128 bits. */
+typedef UInt U128[4];
+
typedef float Float; /* IEEE754 single-precision (32-bit) value */
typedef double Double; /* IEEE754 double-precision (64-bit) value */
diff --git a/pub/libvex_guest_x86.h b/pub/libvex_guest_x86.h
index e3ddfed..c80fafd 100644
--- a/pub/libvex_guest_x86.h
+++ b/pub/libvex_guest_x86.h
@@ -130,6 +130,16 @@
UChar guest_FPTAG[8];
UInt guest_FPROUND;
UInt guest_FC3210;
+ /* SSE */
+ UInt guest_SSEROUND;
+ U128 guest_XMM0;
+ U128 guest_XMM1;
+ U128 guest_XMM2;
+ U128 guest_XMM3;
+ U128 guest_XMM4;
+ U128 guest_XMM5;
+ U128 guest_XMM6;
+ U128 guest_XMM7;
/* Segment registers. */
UShort guest_CS;
UShort guest_DS;
@@ -140,7 +150,7 @@
/* Emulation warnings */
UInt guest_EMWARN;
/* Padding to make it have an 8-aligned size */
- /* UInt padding; */
+ UInt padding;
}
VexGuestX86State;
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index c37adde..73be625 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -46,10 +46,16 @@
/* ------------------ Types ------------------ */
typedef
- enum { Ity_INVALID=0x10FFF,
- Ity_I1=0x11000,
- Ity_I8, Ity_I16, Ity_I32, Ity_I64,
- Ity_F32, Ity_F64
+ enum {
+ Ity_INVALID=0x10FFF,
+ Ity_I1=0x11000,
+ Ity_I8,
+ Ity_I16,
+ Ity_I32,
+ Ity_I64,
+ Ity_F32, /* IEEE 754 float */
+ Ity_F64, /* IEEE 754 double */
+ Ity_V128 /* 128-bit SIMD */
}
IRType;
@@ -60,11 +66,15 @@
/* ------------------ Constants ------------------ */
typedef
- enum { Ico_U1=0x12000,
- Ico_U8, Ico_U16, Ico_U32, Ico_U64,
- Ico_F64, /* 64-bit IEEE754 floating */
- Ico_F64i /* 64-bit unsigned int to be interpreted literally
- as a IEEE754 double value. */
+ enum {
+ Ico_U1=0x12000,
+ Ico_U8,
+ Ico_U16,
+ Ico_U32,
+ Ico_U64,
+ Ico_F64, /* 64-bit IEEE754 floating */
+ Ico_F64i /* 64-bit unsigned int to be interpreted literally
+ as a IEEE754 double value. */
}
IRConstTag;
@@ -303,7 +313,50 @@
/* Reinterpretation. Take an F64 and produce an I64 with
the same bit pattern, or vice versa. */
- Iop_ReinterpF64asI64, Iop_ReinterpI64asF64
+ Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
+
+ /* ------------------ 128-bit SIMD. ------------------ */
+
+ /* 128-bit ops */
+ Iop_And128, Iop_Or128, Iop_Xor128, Iop_Andn128,
+
+ /* --- 32x4 vector FP --- */
+
+ /* binary */
+ Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
+ Iop_Max32Fx4, Iop_Min32Fx4,
+
+ /* unary */
+ Iop_Recip32Fx4, Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
+ Iop_ItoF32x4, /* first arg is IRRoundingMode (Ity_I32) */
+ Iop_FtoI32x4, /* first arg is IRRoundingMode (Ity_I32) */
+
+ /* --- 32x4 lowest-lane-only scalar FP --- */
+
+ /* In binary cases, upper 3/4 is copied from first operand. In
+ unary cases, upper 3/4 is copied from the operand. */
+
+ /* binary */
+ Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
+ Iop_Max32F0x4, Iop_Min32F0x4,
+
+ /* unary */
+ Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
+ Iop_ItoF320x4, /* first arg is IRRoundingMode (Ity_I32) */
+ Iop_FtoI320x4, /* first arg is IRRoundingMode (Ity_I32) */
+
+ /* --- pack / unpack --- */
+
+ /* 64 <-> 128 bit pack/unpack */
+ Iop_128to64, // :: V128 -> I64, low half
+ Iop_128HIto64, // :: V128 -> I64, high half
+ Iop_64HLto128, // :: (I64,I64) -> V128
+
+ /* 128 -> 32 bit unpack */
+ Iop_128W3to32, // :: V128 -> I32, bits 127-96
+ Iop_128W2to32, // :: V128 -> I32, bits 95-64
+ Iop_128W1to32, // :: V128 -> I32, bits 63-32
+ Iop_128W0to32 // :: V128 -> I32, bits 31-0
}
IROp;