More MMX stuff, including a bunch of prefetch insns.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1488 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 6ceb720..1a1fc12 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1399,6 +1399,41 @@
                   nameIReg(4,ireg) );
 }
 
+static void emit_MMX2_reg_to_mmxreg ( FlagSet uses_sflags, 
+                                      FlagSet sets_sflags,
+			              UChar first_byte, 
+                                      UChar second_byte, 
+                                      Int ireg )
+{
+   VG_(new_emit)(True, uses_sflags, sets_sflags);
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( first_byte );
+   second_byte &= 0x38; /* mask out mod and rm fields */
+   second_byte |= 0xC0; /* set top two bits: mod = 11b */
+   second_byte |= (ireg & 7); /* patch in our ireg */
+   VG_(emitB) ( second_byte );
+   if (dis)
+      VG_(printf)("\n\t\tmmx2reg-to-mmxreg--0x%x:0x%x-(%s)\n", 
+                  (UInt)first_byte, (UInt)second_byte,
+                  nameIReg(4,ireg) );
+}
+
+static void emit_MMX3_no_mem ( FlagSet uses_sflags, 
+                               FlagSet sets_sflags,
+			       UChar first_byte, 
+                               UChar second_byte,
+                               UChar third_byte )
+{
+   VG_(new_emit)(True, uses_sflags, sets_sflags);
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( first_byte );
+   VG_(emitB) ( second_byte );
+   VG_(emitB) ( third_byte );
+   if (dis)
+      VG_(printf)("\n\t\tmmx3-0x%x:0x%x:0x%x\n", 
+                  (UInt)first_byte, (UInt)second_byte, (UInt)third_byte );
+}
+
 static void emit_MMX2_no_mem ( FlagSet uses_sflags, 
                                FlagSet sets_sflags,
 			       UChar first_byte, 
@@ -2671,6 +2706,15 @@
 }
 
 
+static void synth_MMX2_reg_to_mmxreg ( Bool uses_flags, Bool sets_flags,
+                                       UChar first_byte,
+                                       UChar second_byte, 
+                                       Int ireg )
+{
+   emit_MMX2_reg_to_mmxreg ( uses_flags, sets_flags,
+                             first_byte, second_byte, ireg );
+}
+
 static void synth_MMX2_no_mem ( Bool uses_flags, Bool sets_flags,
 			        UChar first_byte,
                                 UChar second_byte )
@@ -2679,6 +2723,16 @@
 }
 
 
+static void synth_MMX3_no_mem ( Bool uses_flags, Bool sets_flags,
+			        UChar first_byte,
+                                UChar second_byte,
+                                UChar third_byte )
+{
+   emit_MMX3_no_mem ( uses_flags, sets_flags, 
+                      first_byte, second_byte, third_byte );
+}
+
+
 static void synth_MMX1_no_mem ( Bool uses_flags, Bool sets_flags,
 			        UChar first_byte )
 {
@@ -3425,6 +3479,7 @@
       case MMX2_MemRd:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == RealReg);
+         vg_assert(u->tag3 == NoValue);
          vg_assert(!anyFlagUse(u));
          if (!(*fplive)) {
             emit_get_fpu_state();
@@ -3436,9 +3491,25 @@
                              u->val2 );
          break;
 
+      case MMX2_RegRd:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == RealReg);
+         vg_assert(u->tag3 == NoValue);
+         vg_assert(!anyFlagUse(u));
+         if (!(*fplive)) {
+            emit_get_fpu_state();
+            *fplive = True;
+         }
+         synth_MMX2_reg_to_mmxreg ( u->flags_r, u->flags_w,
+                                    (u->val1 >> 8) & 0xFF,
+                                    u->val1 & 0xFF,
+                                    u->val2 );
+         break;
+
       case MMX1:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == NoValue);
+         vg_assert(u->tag3 == NoValue);
 	 if (!(*fplive)) {
 	    emit_get_fpu_state();
 	    *fplive = True;
@@ -3450,6 +3521,7 @@
       case MMX2:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == NoValue);
+         vg_assert(u->tag3 == NoValue);
 	 if (!(*fplive)) {
 	    emit_get_fpu_state();
 	    *fplive = True;
@@ -3459,6 +3531,20 @@
                              u->val1 & 0xFF );
          break;
 
+      case MMX3:
+         vg_assert(u->tag1 == Lit16);
+         vg_assert(u->tag2 == Lit16);
+         vg_assert(u->tag3 == NoValue);
+	 if (!(*fplive)) {
+	    emit_get_fpu_state();
+	    *fplive = True;
+	 }
+         synth_MMX3_no_mem ( u->flags_r, u->flags_w,
+			     (u->val1 >> 8) & 0xFF,
+                             u->val1 & 0xFF,
+                             u->val2 & 0xFF );
+         break;
+
       default: 
          if (VG_(needs).extended_UCode) {
 	    if (*fplive) {
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index 779ac51..ea29f8f 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -4748,6 +4748,57 @@
 
       /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
 
+      case 0x18: /* PREFETCHT0/PREFETCHT1/PREFETCHT2/PREFETCHNTA */
+         vg_assert(sz == 4);
+         modrm = getUChar(eip);
+         if (epartIsReg(modrm)) {
+            goto unimp2;
+         }
+         if (gregOfRM(modrm) > 3) {
+            goto unimp2;
+         }
+         eip += lengthAMode(eip);
+         if (dis) {
+            UChar* hintstr;
+            switch (gregOfRM(modrm)) {
+               case 0: hintstr = "nta"; break;
+               case 1: hintstr = "t0"; break;
+               case 2: hintstr = "t1"; break;
+               case 3: hintstr = "t2"; break;
+               default: goto unimp2;
+            }
+            VG_(printf)("prefetch%s ...\n", hintstr);
+         }
+         break;
+
+      case 0x73: /* PSLL/PSRA/PSRL mmxreg by imm8 */
+         {
+         UChar byte1, byte2, byte3, subopc, mmxreg;
+         vg_assert(sz == 4);
+         byte1 = opc;
+         byte2 = getUChar(eip); eip++;
+         byte3 = getUChar(eip); eip++;
+         mmxreg = byte2 & 7;
+         subopc = (byte2 >> 3) & 7;
+         if (subopc == 2 || subopc == 6) {  
+            /* 2 == 010 == SRL, 6 == 110 == SLL */
+            /* ok */
+         } else {
+            eip -= 2;
+            goto unimp2;
+         }
+         uInstr2(cb, MMX3, 0, 
+                     Lit16, (((UShort)byte1) << 8) | ((UShort)byte2),
+                     Lit16, ((UShort)byte3) );
+         if (dis)
+            VG_(printf)("ps%s%s $%d, %s\n",
+                        (subopc == 2 ? "rl" : subopc == 6 ? "ll" : "??"),
+                        nameMMXGran(opc & 3),
+                        (Int)byte3,
+                        nameMMXReg(mmxreg) );
+         }
+         break;
+
       case 0x77: /* EMMS */
          vg_assert(sz == 4);
          uInstr1(cb, MMX1, 0, Lit16, ((UShort)(opc)) );
@@ -4755,10 +4806,43 @@
             VG_(printf)("emms\n");
          break;
 
+      case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
+         vg_assert(sz == 4);
+         modrm = getUChar(eip);
+         if (epartIsReg(modrm)) {
+            eip++;
+            t1 = newTemp(cb);
+            uInstr2(cb, GET, 4, ArchReg, eregOfRM(modrm), TempReg, t1);
+            uInstr2(cb, MMX2_RegRd, 4, 
+                        Lit16, 
+                        (((UShort)(opc)) << 8) | ((UShort)modrm),
+                        TempReg, t1 );
+            if (dis)
+               VG_(printf)("movd %s, %s\n", 
+                           nameIReg(4,eregOfRM(modrm)),
+                           nameMMXReg(gregOfRM(modrm)));
+         } else {
+            Int tmpa;
+            pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+            tmpa = LOW24(pair);
+            eip += HI8(pair);
+            uInstr2(cb, LOAD, 4, TempReg, tmpa, TempReg, tmpa);
+            uInstr2(cb, MMX2_RegRd, 4, 
+                        Lit16, 
+                        (((UShort)(opc)) << 8) | ((UShort)modrm),
+                        TempReg, tmpa);
+            if (dis)
+               VG_(printf)("movd %s, %s\n", 
+                           dis_buf,
+                           nameMMXReg(gregOfRM(modrm)));
+         }
+         break;
+
       case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
          vg_assert(sz == 4);
          modrm = getUChar(eip);
          if (epartIsReg(modrm)) {
+            eip++;
             uInstr1(cb, MMX2, 0, 
                         Lit16, 
                         (((UShort)(opc)) << 8) | ((UShort)modrm) );
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 545bd32..74a412f 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -547,8 +547,10 @@
    /* Fields checked:     lit32   size  flags_r/w tag1   tag2   tag3    (rest) */
    case MMX1:
    case MMX2:       return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case MMX3:       return LIT0 && SZ0  && CC0 &&  Ls1 && Ls1 &&  N3 && XOTHER;
    case MMX2_MemRd: return LIT0 && SZ8  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
    case MMX2_MemWr: return LIT0 && SZ8  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
+   case MMX2_RegRd: return LIT0 && SZ4  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
    default: 
       if (VG_(needs).extended_UCode)
          return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
@@ -851,8 +853,10 @@
       case FPU:     return "FPU"  ;
       case MMX1:       return "MMX1" ;
       case MMX2:       return "MMX2" ;
+      case MMX3:       return "MMX3" ;
       case MMX2_MemRd: return "MMX2_MRd" ;
       case MMX2_MemWr: return "MMX2_MWr" ;
+      case MMX2_RegRd: return "MMX2_RRd" ;
       default:
          if (VG_(needs).extended_UCode)
             return SK_(name_XUOpcode)(opc);
@@ -981,9 +985,20 @@
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
 
+      case MMX3:
+         VG_(printf)("\t0x%x:0x%x:0x%x",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
+         break;
+
+      case MMX2_RegRd:
+         VG_(printf)("\t0x%x:0x%x, ",
+                     (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
+         VG_(pp_UOperand)(u, 2, 4, False);
+         break;
+ 
       case MMX2_MemWr:
       case MMX2_MemRd:
-         VG_(printf)("\t0x%x:0x%x",
+          VG_(printf)("\t0x%x:0x%x",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          VG_(pp_UOperand)(u, 2, 4, True);
          break;
@@ -1139,7 +1154,9 @@
       case LEA1: RD(1); WR(2); break;
       case LEA2: RD(1); RD(2); WR(3); break;
 
-      case MMX1: case MMX2:
+      case MMX2_RegRd: RD(2); break;
+
+      case MMX1: case MMX2: case MMX3:
       case NOP:   case FPU:   case INCEIP: case CALLM_S: case CALLM_E:
       case CLEAR: case CALLM: case LOCK: break;
 
@@ -1287,8 +1304,9 @@
       case CC2VAL:
       case JIFZ:
       case FPU: case FPU_R: case FPU_W:
-      case MMX1: case MMX2:
+      case MMX1: case MMX2: case MMX3:
       case MMX2_MemRd: case MMX2_MemWr:
+      case MMX2_RegRd:
       case WIDEN:
       /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
       case GETSEG: case PUTSEG: