mips32: Fixed the problem with FCSR register.

Until now, Valgrind has read two registers to calculate FCSR
value. From now on, Valgrind reads exact number of fpu
registers (one or two).


git-svn-id: svn://svn.valgrind.org/vex/trunk@2794 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_mips_defs.h b/priv/guest_mips_defs.h
index b7fe1fb..f30abc9 100644
--- a/priv/guest_mips_defs.h
+++ b/priv/guest_mips_defs.h
@@ -94,6 +94,7 @@
 
 
 #if defined(__mips__) && ((defined(__mips_isa_rev) && __mips_isa_rev >= 2))
+extern UInt mips32_dirtyhelper_rdhwr ( UInt rt, UInt rd );
 extern ULong mips64_dirtyhelper_rdhwr ( ULong rt, ULong rd );
 #endif
 
diff --git a/priv/guest_mips_helpers.c b/priv/guest_mips_helpers.c
index 675fd47..6a8a563 100644
--- a/priv/guest_mips_helpers.c
+++ b/priv/guest_mips_helpers.c
@@ -1076,6 +1076,21 @@
    case rd: asm volatile ("dmfc0 %0, $" #rd ", "#sel"\n\t" :"=r" (x) ); break;
 
 #if defined(__mips__) && ((defined(__mips_isa_rev) && __mips_isa_rev >= 2))
+UInt mips32_dirtyhelper_rdhwr ( UInt rt, UInt rd )
+{
+   UInt x = 0;
+   switch (rd) {
+      case 1:  /* x = SYNCI_StepSize() */
+         __asm__ __volatile__("rdhwr %0, $1\n\t" : "=r" (x) );
+         break;
+
+      default:
+         vassert(0);
+         break;
+   }
+   return x;
+}
+
 ULong mips64_dirtyhelper_rdhwr ( ULong rt, ULong rd )
 {
    ULong x = 0;
@@ -1094,6 +1109,18 @@
 
 #define ASM_VOLATILE_ROUND32(fs, inst)                              \
    __asm__ volatile("cfc1    $t0, $31"  "\n\t"                      \
+                    "ctc1    %2,  $31"  "\n\t"                      \
+                    "mtc1    %1,  $f0"  "\n\t"                      \
+                    ""#inst" $f0, $f0"  "\n\t"                      \
+                    "cfc1    %0,  $31"  "\n\t"                      \
+                    "ctc1    $t0, $31"  "\n\t"                      \
+                    : "=r" (ret)                                    \
+                    : "r" (loVal), "r" (fcsr)                       \
+                    : "t0", "$f0", "$f1"                            \
+                   );
+
+#define ASM_VOLATILE_ROUND32_DOUBLE(fs, inst)                       \
+   __asm__ volatile("cfc1    $t0, $31"  "\n\t"                      \
                     "ctc1    %3,  $31"  "\n\t"                      \
                     "mtc1    %1,  $f0"  "\n\t"                      \
                     "mtc1    %2,  $f1"  "\n\t"                      \
@@ -1101,7 +1128,7 @@
                     "cfc1    %0,  $31"  "\n\t"                      \
                     "ctc1    $t0, $31"  "\n\t"                      \
                     : "=r" (ret)                                    \
-                    : "r" (addr[fs]), "r" (addr[fs+1]), "r" (fcsr)  \
+                    : "r" (loVal), "r" (hiVal), "r" (fcsr)          \
                     : "t0", "$f0", "$f1"                            \
                    );
 
@@ -1126,34 +1153,38 @@
 #if defined(VGA_mips32)
    VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs;
    UInt *addr = (UInt *)&guest_state->guest_f0;
-#define ASM_VOLATILE_ROUND(fs, inst) ASM_VOLATILE_ROUND32(fs, inst)
+   UInt loVal = addr[fs];
+   UInt hiVal = addr[fs+1];
+#define ASM_VOLATILE_ROUND(fs, inst)        ASM_VOLATILE_ROUND32(fs, inst)
+#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND32_DOUBLE(fs, inst)
 #else
    VexGuestMIPS64State* guest_state = (VexGuestMIPS64State*)gs;
    ULong *addr = (ULong *)&guest_state->guest_f0;
-#define ASM_VOLATILE_ROUND(fs, inst) ASM_VOLATILE_ROUND64(fs, inst)
+#define ASM_VOLATILE_ROUND(fs, inst)        ASM_VOLATILE_ROUND64(fs, inst)
+#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND64(fs, inst)
 #endif
    UInt fcsr = guest_state->guest_FCSR;
    switch (inst) {
       case ROUNDWD:
-         ASM_VOLATILE_ROUND(fs, round.w.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, round.w.d)
          break;
       case FLOORWS:
          ASM_VOLATILE_ROUND(fs, floor.w.s)
          break;
       case FLOORWD:
-         ASM_VOLATILE_ROUND(fs, floor.w.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, floor.w.d)
          break;
       case TRUNCWS:
          ASM_VOLATILE_ROUND(fs, trunc.w.s)
          break;
       case TRUNCWD:
-         ASM_VOLATILE_ROUND(fs, trunc.w.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.w.d)
          break;
       case CEILWS:
          ASM_VOLATILE_ROUND(fs, ceil.w.s)
          break;
       case CEILWD:
-         ASM_VOLATILE_ROUND(fs, ceil.w.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.w.d)
          break;
       case CVTDS:
          ASM_VOLATILE_ROUND(fs, cvt.d.s)
@@ -1165,13 +1196,13 @@
          ASM_VOLATILE_ROUND(fs, cvt.s.w)
          break;
       case CVTSD:
-         ASM_VOLATILE_ROUND(fs, cvt.s.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.d)
          break;
       case CVTWS:
          ASM_VOLATILE_ROUND(fs, cvt.w.s)
          break;
       case CVTWD:
-         ASM_VOLATILE_ROUND(fs, cvt.w.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.w.d)
          break;
       case ROUNDWS:
          ASM_VOLATILE_ROUND(fs, round.w.s)
@@ -1182,37 +1213,37 @@
          ASM_VOLATILE_ROUND(fs, ceil.l.s)
          break;
       case CEILLD:
-         ASM_VOLATILE_ROUND(fs, ceil.l.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.l.d)
          break;
       case CVTDL:
-         ASM_VOLATILE_ROUND(fs, cvt.d.l)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.d.l)
          break;
       case CVTLS:
          ASM_VOLATILE_ROUND(fs, cvt.l.s)
          break;
       case CVTLD:
-         ASM_VOLATILE_ROUND(fs, cvt.l.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.l.d)
          break;
       case CVTSL:
-         ASM_VOLATILE_ROUND(fs, cvt.s.l)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.l)
          break;
       case FLOORLS:
          ASM_VOLATILE_ROUND(fs, floor.l.s)
          break;
       case FLOORLD:
-         ASM_VOLATILE_ROUND(fs, floor.l.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, floor.l.d)
          break;
       case ROUNDLS:
          ASM_VOLATILE_ROUND(fs, round.l.s)
          break;
       case ROUNDLD:
-         ASM_VOLATILE_ROUND(fs, round.l.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, round.l.d)
          break;
       case TRUNCLS:
          ASM_VOLATILE_ROUND(fs, trunc.l.s)
          break;
       case TRUNCLD:
-         ASM_VOLATILE_ROUND(fs, trunc.l.d)
+         ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.l.d)
          break;
 #endif
       default:
diff --git a/priv/guest_mips_toIR.c b/priv/guest_mips_toIR.c
index 6f7eca9..367036d 100644
--- a/priv/guest_mips_toIR.c
+++ b/priv/guest_mips_toIR.c
@@ -1088,7 +1088,10 @@
       stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_FCSR), e));
 }
 
-static void calculateFCSR(UInt fs, UInt inst)
+/* fs   - fpu source register number.
+   inst - fpu instruction that needs to be executed.
+   sz32 - size of source register. */
+static void calculateFCSR(UInt fs, UInt inst, Bool sz32)
 {
    IRDirty *d;
    IRTemp fcsr = newTemp(Ity_I32);
@@ -1102,7 +1105,10 @@
                                        mkU32(inst)));
 
    /* Declare we're reading guest state. */
-   d->nFxState = mode64 ? 1 : 2;
+   if (!mode64 && !sz32)
+      d->nFxState = 2;
+   else
+      d->nFxState = 1;
    vex_bzero(&d->fxState, sizeof(d->fxState));
 
    d->fxState[0].fx     = Ifx_Read;  /* read */
@@ -1112,7 +1118,7 @@
    else
       d->fxState[0].size   = sizeof(UInt);
 
-   if (!mode64) {
+   if (!mode64 && !sz32) {
       d->fxState[1].fx     = Ifx_Read;  /* read */
       d->fxState[1].offset = floatGuestRegOffset(fs+1);
       d->fxState[1].size   = sizeof(UInt);
@@ -11693,7 +11699,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("round.l.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDLS);
+                     calculateFCSR(fs, ROUNDLS, True);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x0),
@@ -11703,7 +11709,7 @@
                   break;
                   case 0x11:  /* D */
                      DIP("round.l.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDLD);
+                     calculateFCSR(fs, ROUNDLD, False);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x0),
                                        getFReg(fs)));
                      break;
@@ -11717,7 +11723,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("trunc.l.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, TRUNCLS);
+                     calculateFCSR(fs, TRUNCLS, True);
                      t0 = newTemp(Ity_I64);
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x3),
                                       getLoFromF64(Ity_F64, getFReg(fs))));
@@ -11726,7 +11732,7 @@
                      break;
                   case 0x11:  /* D */
                      DIP("trunc.l.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, TRUNCLD);
+                     calculateFCSR(fs, TRUNCLD, False);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x3),
                                        getFReg(fs)));
                      break;
@@ -12194,7 +12200,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("cvt.d.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, CVTDS);
+                     calculateFCSR(fs, CVTDS, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12214,7 +12220,7 @@
 
                   case 0x14:
                      DIP("cvt.d.w %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTDW);
+                     calculateFCSR(fs, CVTDW, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12236,7 +12242,7 @@
                   case 0x15: {  /* L */
                      if (mode64) {
                         DIP("cvt.d.l %d, %d", fd, fs);
-                        calculateFCSR(fs, CVTDL);
+                        calculateFCSR(fs, CVTDL, False);
                         t0 = newTemp(Ity_I64);
                         assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs)));
 
@@ -12255,7 +12261,7 @@
                switch (fmt) {
                   case 0x14:  /* W */
                      DIP("cvt.s.w %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSW);
+                     calculateFCSR(fs, CVTSW, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12277,7 +12283,7 @@
 
                   case 0x11:  /* D */
                      DIP("cvt.s.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSD);
+                     calculateFCSR(fs, CVTSD, False);
                      if (mode64) {
                         t0 = newTemp(Ity_F32);
                         assign(t0, binop(Iop_F64toF32, get_IR_roundingmode(),
@@ -12290,7 +12296,7 @@
 
                   case 0x15:  /* L */
                      DIP("cvt.s.l %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSL);
+                     calculateFCSR(fs, CVTSL, False);
                      t0 = newTemp(Ity_I64);
                      assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs)));
 
@@ -12307,7 +12313,7 @@
                switch (fmt) {
                case 0x10:  /* S */
                   DIP("cvt.w.s %d, %d", fd, fs);
-                  calculateFCSR(fs, CVTWS);
+                  calculateFCSR(fs, CVTWS, True);
                   if (mode64) {
                      putFReg(fd, mkWidenFromF32(tyF, binop(Iop_RoundF32toInt,
                              get_IR_roundingmode(), getLoFromF64(tyF,
@@ -12319,7 +12325,7 @@
 
                case 0x11:
                   DIP("cvt.w.d %d, %d", fd, fs);
-                  calculateFCSR(fs, CVTWD);
+                  calculateFCSR(fs, CVTWD, False);
                   if (mode64) {
                      t0 = newTemp(Ity_I32);
                      t1 = newTemp(Ity_F32);
@@ -12347,7 +12353,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("cvt.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTLS);
+                     calculateFCSR(fs, CVTLS, True);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, get_IR_roundingmode(),
@@ -12358,7 +12364,7 @@
 
                   case 0x11: {  /* D */
                      DIP("cvt.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTLD);
+                     calculateFCSR(fs, CVTLD, False);
                      putFReg(fd, binop(Iop_RoundF64toInt,
                              get_IR_roundingmode(), getFReg(fs)));
                      break;
@@ -12373,7 +12379,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("floor.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, FLOORLS);
+                     calculateFCSR(fs, FLOORLS, True);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x1),
@@ -12384,7 +12390,7 @@
 
                   case 0x11:  /* D */
                      DIP("floor.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, FLOORLD);
+                     calculateFCSR(fs, FLOORLD, False);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x1),
                                        getFReg(fs)));
                      break;
@@ -12397,7 +12403,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("round.w.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDWS);
+                     calculateFCSR(fs, ROUNDWS, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12421,7 +12427,7 @@
 
                   case 0x11:  /* D */
                      DIP("round.w.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDWD);
+                     calculateFCSR(fs, ROUNDWD, False);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x0),
@@ -12447,7 +12453,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("floor.w.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, FLOORWS);
+                     calculateFCSR(fs, FLOORWS, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12471,7 +12477,7 @@
 
                   case 0x11:  /* D */
                      DIP("floor.w.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, FLOORWD);
+                     calculateFCSR(fs, FLOORWD, False);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x1),
@@ -12498,7 +12504,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("trunc.w.s %d, %d", fd, fs);
-                     calculateFCSR(fs, TRUNCWS);
+                     calculateFCSR(fs, TRUNCWS, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12521,7 +12527,7 @@
                      break;
                   case 0x11:  /* D */
                      DIP("trunc.w.d %d, %d", fd, fs);
-                     calculateFCSR(fs, TRUNCWD);
+                     calculateFCSR(fs, TRUNCWD, False);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
 
@@ -12549,7 +12555,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("ceil.w.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILWS);
+                     calculateFCSR(fs, CEILWS, True);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12573,7 +12579,7 @@
 
                   case 0x11:  /* D */
                      DIP("ceil.w.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILWD);
+                     calculateFCSR(fs, CEILWD, False);
                      if (!mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x2),
@@ -12597,7 +12603,7 @@
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("ceil.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILLS);
+                     calculateFCSR(fs, CEILLS, True);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x2),
@@ -12608,7 +12614,7 @@
 
                   case 0x11:  /* D */
                      DIP("ceil.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILLD);
+                     calculateFCSR(fs, CEILLD, False);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x2),
                                        getFReg(fs)));
                      break;
@@ -14358,16 +14364,27 @@
                putIReg(rt, getULR());
 #if defined(__mips__) && ((defined(__mips_isa_rev) && __mips_isa_rev >= 2))
             } else if (rd == 1) {
-               IRTemp   val  = newTemp(Ity_I64);
-               IRExpr** args = mkIRExprVec_3 (IRExpr_BBPTR(),
-                                              mkU64(rt), mkU64(rd));
-               IRDirty *d = unsafeIRDirty_1_N(val,
-                                              0,
-                                              "mips64_dirtyhelper_rdhwr",
-                                              &mips64_dirtyhelper_rdhwr,
-                                              args);
-               stmt(IRStmt_Dirty(d));
-               putIReg(rt, mkexpr(val));
+               if (mode64) {
+                  IRTemp   val  = newTemp(Ity_I64);
+                  IRExpr** args = mkIRExprVec_2 (mkU64(rt), mkU64(rd));
+                  IRDirty *d = unsafeIRDirty_1_N(val,
+                                                 0,
+                                                 "mips64_dirtyhelper_rdhwr",
+                                                 &mips64_dirtyhelper_rdhwr,
+                                                 args);
+                  stmt(IRStmt_Dirty(d));
+                  putIReg(rt, mkexpr(val));
+               } else {
+                  IRTemp   val  = newTemp(Ity_I32);
+                  IRExpr** args = mkIRExprVec_2 (mkU32(rt), mkU32(rd));
+                  IRDirty *d = unsafeIRDirty_1_N(val,
+                                                 0,
+                                                 "mips32_dirtyhelper_rdhwr",
+                                                 &mips32_dirtyhelper_rdhwr,
+                                                 args);
+                  stmt(IRStmt_Dirty(d));
+                  putIReg(rt, mkexpr(val));
+               }
 #endif
             } else
                goto decode_failure;