Fix code generation errors for the SSE3g_ family.  This fixes various
segfaults running SSEd code.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1687 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 4be45a7..2c60eb7 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1458,14 +1458,13 @@
                   nameIReg(4,ireg) );
 }
 
-static void emit_SSE3g ( FlagSet uses_sflags, 
-                         FlagSet sets_sflags,
-                         UChar first_byte, 
-                         UChar second_byte, 
-			 UChar third_byte,
-			 UChar fourth_byte,
-                         Int ireg,
-                         Bool reads_ireg )
+static void emit_SSE3g_RegRd ( FlagSet uses_sflags, 
+                               FlagSet sets_sflags,
+                               UChar first_byte, 
+                               UChar second_byte, 
+	                       UChar third_byte,
+			       UChar fourth_byte,
+                               Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1477,15 +1476,40 @@
    VG_(emitB) ( fourth_byte );
    if (dis)
       VG_(printf)(
-         reads_ireg
-            ? "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x-(%s)\n" 
-            : "\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x-(%s)\n",
          (UInt)first_byte, (UInt)second_byte, 
          (UInt)third_byte, (UInt)fourth_byte,
          nameIReg(4,ireg) 
       );
 }
 
+static void emit_SSE3g1_RegRd ( FlagSet uses_sflags, 
+                                FlagSet sets_sflags,
+                                UChar first_byte, 
+                                UChar second_byte, 
+ 			        UChar third_byte,
+                                UChar fourth_byte,
+			        UChar fifth_byte,
+                                Int ireg )
+{
+   VG_(new_emit)(True, uses_sflags, sets_sflags);
+   VG_(emitB) ( first_byte );
+   VG_(emitB) ( second_byte );
+   VG_(emitB) ( third_byte );
+   fourth_byte &= 0x38; /* mask out mod and rm fields */
+   fourth_byte |= 0xC0; /* set top two bits: mod = 11b */
+   fourth_byte |= (ireg & 7); /* patch in our ireg */
+   VG_(emitB) ( fourth_byte );
+   VG_(emitB) ( fifth_byte );
+   if (dis)
+      VG_(printf)(
+         "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         (UInt)first_byte, (UInt)second_byte, 
+         (UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
+         nameIReg(4,ireg) 
+      );
+}
+
 static void emit_SSE3g1_RegWr ( FlagSet uses_sflags, 
                                 FlagSet sets_sflags,
                                 UChar first_byte, 
@@ -1513,29 +1537,27 @@
       );
 }
 
-static void emit_SSE3g1_RegRd ( FlagSet uses_sflags, 
-                                FlagSet sets_sflags,
-                                UChar first_byte, 
-                                UChar second_byte, 
- 			        UChar third_byte,
-                                UChar fourth_byte,
-			        UChar fifth_byte,
-                                Int ireg )
+static void emit_SSE3g_RegWr ( FlagSet uses_sflags, 
+                               FlagSet sets_sflags,
+                               UChar first_byte, 
+                               UChar second_byte, 
+ 			       UChar third_byte,
+                               UChar fourth_byte,
+                               Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
    VG_(emitB) ( second_byte );
    VG_(emitB) ( third_byte );
-   fourth_byte &= 0xF8; /* mask out reg field */
+   fourth_byte &= 0xC7; /* mask out reg field */
    fourth_byte |= 0xC0; /* set top two bits: mod = 11b */
-   fourth_byte |= (ireg & 7); /* patch in our ireg */
+   fourth_byte |= ((ireg & 7) << 3); /* patch in our ireg */
    VG_(emitB) ( fourth_byte );
-   VG_(emitB) ( fifth_byte );
    if (dis)
       VG_(printf)(
-         "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         "\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
          (UInt)first_byte, (UInt)second_byte, 
-         (UInt)third_byte, (UInt)fourth_byte, (UInt)fifth_byte,
+         (UInt)third_byte, (UInt)fourth_byte,
          nameIReg(4,ireg) 
       );
 }
@@ -3795,13 +3817,21 @@
             emit_get_sse_state();
             *sselive = True;
          }
-         emit_SSE3g ( u->flags_r, u->flags_w,
-                      (u->val1 >> 8) & 0xFF,
-                      u->val1 & 0xFF,
-                      (u->val2 >> 8) & 0xFF,
-                      u->val2 & 0xFF,
-                      u->val3,
-                      u->opcode==SSE3g_RegRd ? True : False );
+	 if (u->opcode==SSE3g_RegRd) {
+            emit_SSE3g_RegRd ( u->flags_r, u->flags_w,
+                               (u->val1 >> 8) & 0xFF,
+                               u->val1 & 0xFF,
+                               (u->val2 >> 8) & 0xFF,
+                               u->val2 & 0xFF,
+                               u->val3 );
+	 } else {
+            emit_SSE3g_RegWr ( u->flags_r, u->flags_w,
+                               (u->val1 >> 8) & 0xFF,
+                               u->val1 & 0xFF,
+                               (u->val2 >> 8) & 0xFF,
+                               u->val2 & 0xFF,
+                               u->val3 );
+	 }
          break;
 
       case SSE3g1_RegWr:
@@ -3847,7 +3877,7 @@
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == Lit16);
          vg_assert(u->tag3 == NoValue);
-         vg_assert(!anyFlagUse(u));
+         vg_assert(u->flags_r == FlagsEmpty); 
          if (!(*sselive)) {
             emit_get_sse_state();
             *sselive = True;