Rationalise ucode generation for 4-byte moves into the MMX unit (movd).


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1506 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
index 8de3290..511c7c5 100644
--- a/addrcheck/ac_main.c
+++ b/addrcheck/ac_main.c
@@ -1096,11 +1096,11 @@
 
          case MMX2_MemRd:
          case MMX2_MemWr:
-            sk_assert(u_in->size == 8);
+            sk_assert(u_in->size == 4 || u_in->size == 8);
             t_addr = u_in->val2;
             t_size = newTemp(cb);
 	    uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_size);
-	    uLiteral(cb, 8);
+	    uLiteral(cb, u_in->size);
             uInstr2(cb, CCALL, 0, TempReg, t_addr, TempReg, t_size);
             uCCall(cb, (Addr) & ac_fpu_ACCESS_check, 2, 2, False );
             VG_(copy_UInstr)(cb, u_in);
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 0b5cb70..63c3e62 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -539,7 +539,7 @@
             break;
 
          case MMX2_MemRd:
-            sk_assert(u_in->size == 8);
+            sk_assert(u_in->size == 4 || u_in->size == 8);
             /* fall through */
          case FPU_R:
             sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
@@ -548,7 +548,7 @@
             break;
 
          case MMX2_MemWr:
-            sk_assert(u_in->size == 8);
+            sk_assert(u_in->size == 4 || u_in->size == 8);
             /* fall through */
          case FPU_W:
             sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
@@ -752,7 +752,7 @@
             break;
 
          case MMX2_MemRd:
-            sk_assert(u_in->size == 8);
+            sk_assert(u_in->size == 4 || u_in->size == 8);
             /* fall through */
          case FPU_R:
             t_read      = u_in->val2;
@@ -770,7 +770,7 @@
           * As for the MOV, if it's a mod instruction it's redundant, but it's
           * not expensive and mod instructions are rare anyway. */
          case MMX2_MemWr:
-            sk_assert(u_in->size == 8);
+            sk_assert(u_in->size == 4 || u_in->size == 8);
             /* fall through */
          case STORE:
          case FPU_W:
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index d320d4b..cbfd51d 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -4849,8 +4849,7 @@
             pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
             tmpa = LOW24(pair);
             eip += HI8(pair);
-            uInstr2(cb, LOAD, 4, TempReg, tmpa, TempReg, tmpa);
-            uInstr2(cb, MMX2_RegRd, 4, 
+            uInstr2(cb, MMX2_MemRd, 4, 
                         Lit16, 
                         (((UShort)(opc)) << 8) | ((UShort)modrm),
                         TempReg, tmpa);
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 74a412f..c99e53e 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -410,6 +410,7 @@
 #  define SZ1  (u->size == 1)
 #  define SZ0  (u->size == 0)
 #  define SZ42 (u->size == 4 || u->size == 2)
+#  define SZ48 (u->size == 4 || u->size == 8)
 #  define SZi  (u->size == 4 || u->size == 2 || u->size == 1)
 #  define SZf  (  u->size ==  4 || u->size ==  8 || u->size ==   2     \
                || u->size == 10 || u->size == 28 || u->size == 108)
@@ -548,7 +549,7 @@
    case MMX1:
    case MMX2:       return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
    case MMX3:       return LIT0 && SZ0  && CC0 &&  Ls1 && Ls1 &&  N3 && XOTHER;
-   case MMX2_MemRd: return LIT0 && SZ8  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
+   case MMX2_MemRd: return LIT0 && SZ48 && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
    case MMX2_MemWr: return LIT0 && SZ8  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
    case MMX2_RegRd: return LIT0 && SZ4  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
    default: 
@@ -570,6 +571,7 @@
 #  undef SZ1
 #  undef SZ0
 #  undef SZ42
+#  undef SZ48
 #  undef SZi
 #  undef SZf
 #  undef SZ4m
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 01d452d..1833e72 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1060,6 +1060,9 @@
          case FPU_R: case FPU_W: {
             Int t_size = INVALID_TEMPREG;
 
+            if (u_in->opcode == MMX2_MemRd || u_in->opcode == MMX2_MemWr)
+               sk_assert(u_in->size == 4 || u_in->size == 8);
+
             sk_assert(u_in->tag2 == TempReg);
             uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
             uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));