Fill in a few more SSE/SSE2 insns, with current aim of being able to
run Qt-3.1 as built with "icc -xW" (P4 code generation).  Hopefully by
now I've worked through most SSE/SSE2 conceptual nasties, and it's
mostly a question of filling in the gaps.

I think I might have created a bug of some kind with SSE3g_RegWr.  My
current test app segfaults if I run without --optimise=no, which makes
me think I've written something erroneous in the UInstr predicates
controlling optimisation.  I don't know what though.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1676 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index c609a89..0b90f11 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1464,7 +1464,8 @@
                          UChar second_byte, 
 			 UChar third_byte,
 			 UChar fourth_byte,
-                         Int ireg )
+                         Int ireg,
+                         Bool reads_ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
    VG_(emitB) ( first_byte );
@@ -1475,10 +1476,14 @@
    fourth_byte |= (ireg & 7); /* patch in our ireg */
    VG_(emitB) ( fourth_byte );
    if (dis)
-      VG_(printf)("\n\t\tsse-reg-to-xmmreg--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
-                  (UInt)first_byte, (UInt)second_byte, 
-                  (UInt)third_byte, (UInt)fourth_byte,
-                  nameIReg(4,ireg) );
+      VG_(printf)(
+         reads_ireg
+            ? "\n\t\tireg-to-ssereg--0x%x:0x%x:0x%x:0x%x-(%s)\n" 
+            : "\n\t\tssereg-to-ireg--0x%x:0x%x:0x%x:0x%x-(%s)\n", 
+         (UInt)first_byte, (UInt)second_byte, 
+         (UInt)third_byte, (UInt)fourth_byte,
+         nameIReg(4,ireg) 
+      );
 }
 
 static void emit_SSE4 ( FlagSet uses_sflags, 
@@ -3713,7 +3718,6 @@
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == Lit16);
          vg_assert(u->tag3 == RealReg);
-         vg_assert(!anyFlagUse(u));
          if (!(*sselive)) {
             emit_get_sse_state();
             *sselive = True;
@@ -3727,6 +3731,7 @@
          break;
 
       case SSE3g_RegRd:
+      case SSE3g_RegWr:
          vg_assert(u->size == 4);
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == Lit16);
@@ -3741,7 +3746,8 @@
                       u->val1 & 0xFF,
                       (u->val2 >> 8) & 0xFF,
                       u->val2 & 0xFF,
-                      u->val3 );
+                      u->val3,
+                      u->opcode==SSE3g_RegRd ? True : False );
          break;
 
       case SSE4: