Move the handling of PSHUFW from the SSE code to the MMX code so that
it will work on older Athlons which only have MMXEXT support.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2319 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 2f83e76..a0641dd 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -604,6 +604,13 @@
is_FPU_R = True;
break;
+ case MMX2a1_MemRd:
+ sk_assert(u_in->size == 8);
+ sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+ t_read = u_in->val3;
+ is_FPU_R = True;
+ break;
+
case SSE2a_MemRd:
case SSE2a1_MemRd:
sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
@@ -857,6 +864,18 @@
: MIN_LINE_SIZE);
VG_(copy_UInstr)(cb, u_in);
break;
+ break;
+
+ case MMX2a1_MemRd:
+ sk_assert(u_in->size == 8);
+ t_read = u_in->val3;
+ t_read_addr = newTemp(cb);
+ uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
+ VG_(copy_UInstr)(cb, u_in);
+ break;
case SSE2a_MemRd:
case SSE2a1_MemRd:
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 859d098..211f58e 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1497,6 +1497,28 @@
nameIReg(4,ireg) );
}
+static void emit_MMX2a1 ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+ boundscheck();
+
+ VG_(emitB) ( 0x0F );
+ VG_(emitB) ( first_byte );
+ second_byte &= 0x38; /* mask out mod and rm fields */
+ emit_amode_regmem_reg ( ireg, second_byte >> 3 );
+ VG_(emitB) ( third_byte );
+ if (dis)
+ VG_(printf)("\n\t\tmmx2a1-0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte, (UInt)third_byte,
+ nameIReg(4,ireg) );
+}
+
static void emit_SSE2a ( FlagSet uses_sflags,
FlagSet sets_sflags,
UChar first_byte,
@@ -3273,6 +3295,17 @@
}
+static void synth_MMX2a1 ( Bool uses_flags, Bool sets_flags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ Int ireg )
+{
+ emit_MMX2a1 ( uses_flags, sets_flags,
+ first_byte, second_byte, third_byte, ireg );
+}
+
+
static void synth_MMX2_reg_to_mmxreg ( Bool uses_flags, Bool sets_flags,
UChar first_byte,
UChar second_byte,
@@ -4076,6 +4109,23 @@
u->val2 );
break;
+ case MMX2a1_MemRd:
+ vg_assert(u->size == 8);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ synth_MMX2a1 ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ u->val2 & 0xFF,
+ u->val3 );
+ break;
+
case MMX2_ERegRd:
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == RealReg);
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index a63a4a5..41ea9c2 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -3289,6 +3289,57 @@
}
+/* Simple MMX operations, either
+ op (src)mmxreg, (dst)mmxreg
+ or
+ op (src)address, (dst)mmxreg
+ opc is the byte following the 0x0F prefix.
+*/
+static
+Addr dis_MMXop_regmem_to_reg_Imm8 ( UCodeBlock* cb,
+ UChar sorb,
+ Addr eip,
+ UChar opc,
+ Char* name,
+ Bool show_granularity )
+{
+ Char dis_buf[50];
+ UChar modrm = getUChar(eip);
+ UChar imm8;
+ Bool isReg = epartIsReg(modrm);
+
+ if (isReg) {
+ eip++;
+ imm8 = getUChar(eip);
+ eip++;
+ uInstr2(cb, MMX3, 0,
+ Lit16,
+ (((UShort)(opc)) << 8) | ((UShort)modrm),
+ Lit16,
+ ((UShort)imm8));
+ } else {
+ UInt pair = disAMode ( cb, sorb, eip, dis_buf );
+ Int tmpa = LOW24(pair);
+ eip += HI8(pair);
+ imm8 = getUChar(eip);
+ eip++;
+ uInstr3(cb, MMX2a1_MemRd, 8,
+ Lit16,
+ (((UShort)(opc)) << 8) | ((UShort)modrm),
+ Lit16,
+ ((UShort)imm8),
+ TempReg, tmpa);
+ }
+
+ DIP("%s%s %s, %s, $%d\n",
+ name, show_granularity ? nameMMXGran(opc & 3) : (Char*)"",
+ ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
+ nameMMXReg(gregOfRM(modrm)), (Int)imm8 );
+
+ return eip;
+}
+
+
/* Simple SSE operations, either
op (src)xmmreg, (dst)xmmreg
@@ -4217,15 +4268,6 @@
goto decode_success;
}
- /* PSHUFW */
- if (sz == 4
- && insn[0] == 0x0F && insn[1] == 0x70) {
- eip = dis_SSE2_reg_or_mem_Imm8 ( cb, sorb, eip+2, 8,
- "pshufw",
- insn[0], insn[1] );
- goto decode_success;
- }
-
/* SHUFPD */
if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
eip = dis_SSE3_reg_or_mem_Imm8 ( cb, sorb, eip+2, 16, "shufpd",
@@ -7143,6 +7185,12 @@
eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "psadbw", False );
break;
+ case 0x70:
+ /* PSHUFW imm8, (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg_Imm8 ( cb, sorb, eip, opc, "pshufw", False );
+ break;
+
case 0xD7:
/* PMOVMSKB (src)mmxreg, (dst)ireg */
vg_assert(sz == 4);
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index faaf877..738c5f7 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -555,14 +555,15 @@
(u->argc > 1 ? TR2 : N2) &&
(u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
u->regparms_n <= u->argc && XCCALL;
- /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
+ /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
case MMX1:
- case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
- case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
- case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
- case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
- case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
- case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
+ case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
+ case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
+ case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
+ case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
+ case MMX2a1_MemRd: return LIT0 && SZ8 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
+ case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
/* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
@@ -897,6 +898,7 @@
case MMX3: return "MMX3" ;
case MMX2_MemRd: return "MMX2_MRd" ;
case MMX2_MemWr: return "MMX2_MWr" ;
+ case MMX2a1_MemRd: return "MMX2a1_MRd" ;
case MMX2_ERegRd: return "MMX2_eRRd" ;
case MMX2_ERegWr: return "MMX2_eRWr" ;
case SSE2a_MemWr: return "SSE2a_MWr";
@@ -1067,6 +1069,12 @@
VG_(pp_UOperand)(u, 2, 4, True);
break;
+ case MMX2a1_MemRd:
+ VG_(printf)("0x%x:0x%x:0x%x",
+ (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
+ VG_(pp_UOperand)(u, 3, 4, True);
+ break;
+
case SSE2a_MemWr:
case SSE2a_MemRd:
case SSE2g_RegWr:
@@ -1296,6 +1304,7 @@
case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
+ case MMX2a1_MemRd: RD(3); break;
case MMX2_ERegRd: RD(2); break;
case MMX2_ERegWr: WR(2); break;
@@ -1451,7 +1460,7 @@
case JIFZ:
case FPU: case FPU_R: case FPU_W:
case MMX1: case MMX2: case MMX3:
- case MMX2_MemRd: case MMX2_MemWr:
+ case MMX2_MemRd: case MMX2_MemWr: case MMX2a1_MemRd:
case MMX2_ERegRd: case MMX2_ERegWr:
case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
case SSE2g_RegWr: case SSE2g1_RegWr: case SSE2e1_RegRd:
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index 9b54604..a3516cf 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -2145,6 +2145,23 @@
break;
}
+ case MMX2a1_MemRd: {
+ sk_assert(8 == u_in->size);
+
+ t_size = newTemp(cb);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_size);
+ uLiteral(cb, (UInt)u_in->size);
+
+ /* XXX all registers should be flushed to baseblock
+ here */
+ uInstr2(cb, CCALL, 0, TempReg, u_in->val3, TempReg, t_size);
+ uCCall(cb, (Addr) & eraser_mem_help_read_N, 2, 2, False);
+
+ VG_(copy_UInstr)(cb, u_in);
+ t_size = INVALID_TEMPREG;
+ break;
+ }
+
case SSE2a_MemRd:
case SSE2a1_MemRd:
case SSE3a_MemRd:
diff --git a/include/vg_skin.h.base b/include/vg_skin.h.base
index 7070479..d316229 100644
--- a/include/vg_skin.h.base
+++ b/include/vg_skin.h.base
@@ -665,6 +665,15 @@
MMX2_MemRd,
MMX2_MemWr,
+ /* 3 bytes, reads/writes mem. Insns of the form
+ bbbbbbbb:mod mmxreg r/m:bbbbbbbb
+ Held in val1[15:0] and val2[7:0], and mod and rm are to be
+ replaced at codegen time by a reference to the Temp/RealReg
+ holding the address. Arg2 holds this Temp/Real Reg.
+ Transfer is always at size 8.
+ */
+ MMX2a1_MemRd,
+
/* 2 bytes, reads/writes an integer ("E") register. Insns of the form
bbbbbbbb:11 mmxreg ireg.
Held in val1[15:0], and ireg is to be replaced
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index fc62ba9..7a2e05b 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1075,6 +1075,25 @@
break;
}
+ case MMX2a1_MemRd: {
+ Int t_size = INVALID_TEMPREG;
+
+ sk_assert(u_in->size == 8);
+
+ sk_assert(u_in->tag3 == TempReg);
+ uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val3));
+ uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val3));
+
+ t_size = newTemp(cb);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_size);
+ uLiteral(cb, u_in->size);
+ uInstr2(cb, CCALL, 0, TempReg, u_in->val3, TempReg, t_size);
+ uCCall(cb, (Addr) & MC_(fpu_read_check), 2, 2, False);
+
+ VG_(copy_UInstr)(cb, u_in);
+ break;
+ }
+
/* SSE ins referencing scalar integer registers */
case SSE2g_RegWr:
case SSE2g1_RegWr: