- Continue to clear up SSE insn emitters following recent cleanups re
  G vs E register fields in encoding
- Add SSE3e_RegWr and use it to implement a case in MOVD.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1699 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
index cec7186..c65b98c 100644
--- a/addrcheck/ac_main.c
+++ b/addrcheck/ac_main.c
@@ -1020,6 +1020,7 @@
 	    VG_(skin_panic)("AddrCheck: unhandled SSE uinstr");
 	    break;
 
+         case SSE3e_RegWr:
          case SSE3g1_RegWr:
          case SSE5:
          case SSE3g_RegWr:
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 9bf4332..8abf767 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1432,7 +1432,7 @@
    third_byte &= 0x38; /* mask out mod and rm fields */
    emit_amode_regmem_reg ( ireg, third_byte >> 3 );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x-(%s)\n", 
+      VG_(printf)("\n\t\tsse2a-0x%x:0x%x:0x%x-(%s)\n", 
                   (UInt)first_byte, (UInt)second_byte, (UInt)third_byte,
                   nameIReg(4,ireg) );
 }
@@ -1452,19 +1452,19 @@
    fourth_byte &= 0x38; /* mask out mod and rm fields */
    emit_amode_regmem_reg ( ireg, fourth_byte >> 3 );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+      VG_(printf)("\n\t\tsse3a-0x%x:0x%x:0x%x:0x%x-(%s)\n", 
                   (UInt)first_byte, (UInt)second_byte, 
                   (UInt)third_byte, (UInt)fourth_byte,
                   nameIReg(4,ireg) );
 }
 
-static void emit_SSE3g_RegRd ( FlagSet uses_sflags, 
-                               FlagSet sets_sflags,
-                               UChar first_byte, 
-                               UChar second_byte, 
-	                       UChar third_byte,
-			       UChar fourth_byte,
-                               Int ireg )
+static void emit_SSE3e ( FlagSet uses_sflags, 
+                         FlagSet sets_sflags,
+                         UChar first_byte, 
+                         UChar second_byte, 
+                         UChar third_byte,
+			 UChar fourth_byte,
+                         Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1476,21 +1476,21 @@
    VG_(emitB) ( fourth_byte );
    if (dis)
       VG_(printf)(
-         "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x-(%s)\n",
+         "\n\t\tsse3e--0x%x:0x%x:0x%x:0x%x-(%s)\n",
          (UInt)first_byte, (UInt)second_byte, 
          (UInt)third_byte, (UInt)fourth_byte,
          nameIReg(4,ireg) 
       );
 }
 
-static void emit_SSE3g1_RegRd ( FlagSet uses_sflags, 
-                                FlagSet sets_sflags,
-                                UChar first_byte, 
-                                UChar second_byte, 
- 			        UChar third_byte,
-                                UChar fourth_byte,
-			        UChar fifth_byte,
-                                Int ireg )
+static void emit_SSE3e1 ( FlagSet uses_sflags, 
+                          FlagSet sets_sflags,
+                          UChar first_byte, 
+                          UChar second_byte, 
+ 			  UChar third_byte,
+                          UChar fourth_byte,
+			  UChar fifth_byte,
+                          Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1503,21 +1503,21 @@
    VG_(emitB) ( fifth_byte );
    if (dis)
       VG_(printf)(
-         "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         "\n\t\tsse3e1--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
          (UInt)first_byte, (UInt)second_byte, 
          (UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
          nameIReg(4,ireg) 
       );
 }
 
-static void emit_SSE3g1_RegWr ( FlagSet uses_sflags, 
-                                FlagSet sets_sflags,
-                                UChar first_byte, 
-                                UChar second_byte, 
- 			        UChar third_byte,
-                                UChar fourth_byte,
-			        UChar fifth_byte,
-                                Int ireg )
+static void emit_SSE3g1 ( FlagSet uses_sflags, 
+                          FlagSet sets_sflags,
+                          UChar first_byte, 
+                          UChar second_byte, 
+                          UChar third_byte,
+                          UChar fourth_byte,
+			  UChar fifth_byte,
+                          Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1530,20 +1530,20 @@
    VG_(emitB) ( fifth_byte );
    if (dis)
       VG_(printf)(
-         "\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         "\n\t\tsse3g1_reg_wr--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
          (UInt)first_byte, (UInt)second_byte, 
          (UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
          nameIReg(4,ireg) 
       );
 }
 
-static void emit_SSE3g_RegWr ( FlagSet uses_sflags, 
-                               FlagSet sets_sflags,
-                               UChar first_byte, 
-                               UChar second_byte, 
- 			       UChar third_byte,
-                               UChar fourth_byte,
-                               Int ireg )
+static void emit_SSE3g ( FlagSet uses_sflags, 
+                         FlagSet sets_sflags,
+                         UChar first_byte, 
+                         UChar second_byte, 
+ 			 UChar third_byte,
+                         UChar fourth_byte,
+                         Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1555,7 +1555,7 @@
    VG_(emitB) ( fourth_byte );
    if (dis)
       VG_(printf)(
-         "\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         "\n\t\tsse3g--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
          (UInt)first_byte, (UInt)second_byte, 
          (UInt)third_byte, (UInt)fourth_byte,
          nameIReg(4,ireg) 
@@ -1575,7 +1575,7 @@
    VG_(emitB) ( third_byte );
    VG_(emitB) ( fourth_byte );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x:0x%x\n", 
+      VG_(printf)("\n\t\tsse4-0x%x:0x%x:0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte, 
                   (UInt)third_byte, (UInt)fourth_byte );
 }
@@ -1595,7 +1595,7 @@
    VG_(emitB) ( fourth_byte );
    VG_(emitB) ( fifth_byte );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x:0x%x:0x%x\n", 
+      VG_(printf)("\n\t\tsse5-0x%x:0x%x:0x%x:0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte, 
                   (UInt)third_byte, (UInt)fourth_byte,
                   (UInt)fifth_byte );
@@ -1612,7 +1612,7 @@
    VG_(emitB) ( second_byte );
    VG_(emitB) ( third_byte );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x\n", 
+      VG_(printf)("\n\t\tsse3-0x%x:0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte, 
                   (UInt)third_byte );
 }
@@ -1633,7 +1633,7 @@
       dest_reg. */
    emit_amode_regmem_reg ( addr_reg, dest_reg );
    if (dis)
-      VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x(addr=%s, dest=%s)\n", 
+      VG_(printf)("\n\t\tsse3ag_mem_rd_reg_wr-0x%x:0x%x:0x%x(addr=%s, dest=%s)\n", 
                   (UInt)first_byte, (UInt)second_byte, 
             	  (UInt)third_byte, nameIReg(4, addr_reg), 
                                     nameIReg(4, dest_reg));
@@ -3849,6 +3849,7 @@
                       u->val3 );
          break;
 
+      case SSE3e_RegWr:
       case SSE3e_RegRd:
       case SSE3g_RegWr:
          vg_assert(u->size == 4);
@@ -3860,20 +3861,20 @@
             emit_get_sse_state();
             *sselive = True;
          }
-	 if (u->opcode==SSE3e_RegRd) {
-            emit_SSE3g_RegRd ( u->flags_r, u->flags_w,
-                               (u->val1 >> 8) & 0xFF,
-                               u->val1 & 0xFF,
-                               (u->val2 >> 8) & 0xFF,
-                               u->val2 & 0xFF,
-                               u->val3 );
+	 if (u->opcode==SSE3e_RegRd || u->opcode==SSE3e_RegWr) {
+            emit_SSE3e ( u->flags_r, u->flags_w,
+                         (u->val1 >> 8) & 0xFF,
+                         u->val1 & 0xFF,
+                         (u->val2 >> 8) & 0xFF,
+                         u->val2 & 0xFF,
+                         u->val3 );
 	 } else {
-            emit_SSE3g_RegWr ( u->flags_r, u->flags_w,
-                               (u->val1 >> 8) & 0xFF,
-                               u->val1 & 0xFF,
-                               (u->val2 >> 8) & 0xFF,
-                               u->val2 & 0xFF,
-                               u->val3 );
+            emit_SSE3g ( u->flags_r, u->flags_w,
+                         (u->val1 >> 8) & 0xFF,
+                         u->val1 & 0xFF,
+                         (u->val2 >> 8) & 0xFF,
+                         u->val2 & 0xFF,
+                         u->val3 );
 	 }
          break;
 
@@ -3887,13 +3888,13 @@
             emit_get_sse_state();
             *sselive = True;
          }
-         emit_SSE3g1_RegWr ( u->flags_r, u->flags_w,
-                             (u->val1 >> 8) & 0xFF,
-                             u->val1 & 0xFF,
-                             (u->val2 >> 8) & 0xFF,
-                             u->val2 & 0xFF,
-                             u->lit32 & 0xFF,
-                             u->val3 );
+         emit_SSE3g1 ( u->flags_r, u->flags_w,
+                       (u->val1 >> 8) & 0xFF,
+                       u->val1 & 0xFF,
+                       (u->val2 >> 8) & 0xFF,
+                       u->val2 & 0xFF,
+                       u->lit32 & 0xFF,
+                       u->val3 );
          break;
 
       case SSE3e1_RegRd:
@@ -3906,13 +3907,13 @@
             emit_get_sse_state();
             *sselive = True;
          }
-         emit_SSE3g1_RegRd ( u->flags_r, u->flags_w,
-                             (u->val1 >> 8) & 0xFF,
-                             u->val1 & 0xFF,
-                             (u->val2 >> 8) & 0xFF,
-                             u->val2 & 0xFF,
-                             u->lit32 & 0xFF,
-                             u->val3 );
+         emit_SSE3e1 ( u->flags_r, u->flags_w,
+                       (u->val1 >> 8) & 0xFF,
+                       u->val1 & 0xFF,
+                       (u->val2 >> 8) & 0xFF,
+                       u->val2 & 0xFF,
+                       u->lit32 & 0xFF,
+                       u->val3 );
          break;
 
       case SSE5:
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index e622bf7..067d347 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -4209,7 +4209,16 @@
       Bool is_store = insn[1]==0x7E;
       modrm = insn[2];
       if (epartIsReg(modrm) && is_store) {
-	 VG_(core_panic)("MOVD reg - store (to ireg)");
+         t1 = newTemp(cb);
+         uInstr3(cb, SSE3e_RegWr, 4,
+                     Lit16, (((UShort)0x66) << 8) | (UShort)insn[0],
+                     Lit16, (((UShort)insn[1]) << 8) | (UShort)modrm,
+                     TempReg, t1 );
+	 uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, eregOfRM(modrm));
+	 if (dis)
+	    VG_(printf)("movd %s, %s\n", 
+		        nameXMMReg(gregOfRM(modrm)),
+		        nameIReg(4,eregOfRM(modrm)));
       } else
       if (epartIsReg(modrm) && !is_store) {
          t1 = newTemp(cb);
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 5a5dfd4..d0b0458 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -564,6 +564,7 @@
    case SSE3a_MemWr:  return LIT0 && SZsse && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
    case SSE3a_MemRd:  return LIT0 && SZsse && CCf  && Ls1 && Ls2 && TR3 && XOTHER;
    case SSE3e_RegRd:  return LIT0 && SZ4   && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
+   case SSE3e_RegWr:  return LIT0 && SZ4   && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
    case SSE3g_RegWr:  return LIT0 && SZ4   && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
    case SSE3g1_RegWr: return LIT8 && SZ4   && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
    case SSE3e1_RegRd: return LIT8 && SZ2   && CC0  && Ls1 && Ls2 && TR3 && XOTHER;
@@ -887,6 +888,7 @@
       case SSE2a_MemWr: return "SSE2a_MWr";
       case SSE2a_MemRd: return "SSE2a_MRd";
       case SSE3e_RegRd: return "SSE3e_RRd";
+      case SSE3e_RegWr: return "SSE3e_RWr";
       case SSE3g_RegWr: return "SSE3g_RWr";
       case SSE3g1_RegWr: return "SSE3g1_RWr";
       case SSE3e1_RegRd: return "SSE3e1_RRd";
@@ -1061,6 +1063,7 @@
          VG_(pp_UOperand)(u, 3, 4, True);
          break;
 
+      case SSE3e_RegWr:
       case SSE3e_RegRd:
       case SSE3g_RegWr:
          VG_(printf)("0x%x:0x%x:0x%x:0x%x",
@@ -1257,6 +1260,7 @@
       case SSE3e1_RegRd:
       case SSE2a_MemRd: RD(3); break;
 
+      case SSE3e_RegWr:
       case SSE3g1_RegWr:
       case SSE3g_RegWr: WR(3); break;
 
@@ -1419,7 +1423,7 @@
       case MMX2_ERegRd: case MMX2_ERegWr:
       case SSE2a_MemWr: case SSE2a_MemRd:
       case SSE3a_MemWr: case SSE3a_MemRd:
-      case SSE3e_RegRd: case SSE3g_RegWr: 
+      case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
       case SSE3g1_RegWr: case SSE3e1_RegRd:
       case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
       case WIDEN:
diff --git a/include/vg_skin.h b/include/vg_skin.h
index 1ac7767..ec61a70 100644
--- a/include/vg_skin.h
+++ b/include/vg_skin.h
@@ -654,6 +654,7 @@
          Transfer is always at size 4.  Arg3 holds this Temp/Real Reg.
       */
       SSE3e_RegRd,
+      SSE3e_RegWr, /* variant that writes Ereg, not reads it */
 
       /* 5 bytes, reads an integer register.  Insns of the form
          bbbbbbbb:bbbbbbbb:bbbbbbbb: 11 bbb ireg :bbbbbbbb. Held in