guest x86: fix x87 FP rounding modes enough so that
none/tests/x86/insn_fpu works correctly.



git-svn-id: svn://svn.valgrind.org/vex/trunk@598 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest-x86/ghelpers.c b/priv/guest-x86/ghelpers.c
index 91ab8d8..855d753 100644
--- a/priv/guest-x86/ghelpers.c
+++ b/priv/guest-x86/ghelpers.c
@@ -1388,16 +1388,15 @@
 
 
 /* native_fpucw[15:0] contains a x87 native format FPU control word.
-   Extract from it the required FPRTZ value and any resulting
-   emulation warning, and return (warn << 32) | fprtz value. */
+   Extract from it the required FPROUND value and any resulting
+   emulation warning, and return (warn << 32) | fpround value. 
+*/
 /* CLEAN HELPER */
 ULong x86h_check_fldcw ( UInt fpucw )
 {
-   /* Decide on a rounding mode.  fpucw[11:10] must be either
-      00b(round to nearest) or 11b(round to zero).  No others
-      supported.  Others are mapped to round-to-nearest. */
+   /* Decide on a rounding mode.  fpucw[11:10] holds it. */
+   /* NOTE, encoded exactly as per enum IRRoundingMode. */
    UInt rmode = (fpucw >> 10) & 3;
-   UInt fprtz = rmode==0 ? 0 : 1;
 
    /* Detect any required emulation warnings. */
    VexEmWarn ew = EmWarn_NONE;
@@ -1407,25 +1406,21 @@
       ew = EmWarn_X86_x87exns;
    }
    else 
-   if (rmode != 0 && rmode != 3) {
-      /* unsupported rounding mode */
-      ew = EmWarn_X86_x87rounding;
-   }
-   else
    if (((fpucw >> 8) & 3) != 3) {
       /* unsupported precision */
       ew = EmWarn_X86_x87precision;
    }
 
-   return (((ULong)ew) << 32) | ((ULong)fprtz);
+   return (((ULong)ew) << 32) | ((ULong)rmode);
 }
 
 /* CLEAN HELPER */
 /* Given fprtz as 1 or 0, create a suitable x87 native format
    FPU control word. */
-UInt x86h_create_fpucw ( UInt fprtz )
+UInt x86h_create_fpucw ( UInt fpround )
 {
-   return (fprtz & 1) ? 0x0F7F : 0x037F;
+   fpround &= 3;
+   return 0x037F | (fpround << 10);
 }
 
 
@@ -1443,7 +1438,7 @@
    UInt       fpucw   = x87->env[FP_ENV_CTRL];
    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
    VexEmWarn  ew;
-   UInt       fprtz;
+   UInt       fpround;
    ULong      pair;
 
    /* Copy registers and tags */
@@ -1466,13 +1461,13 @@
    /* status word */
    vex_state->guest_FC3210 = c3210;
 
-   /* handle the control word, setting FPRTZ and detecting any 
+   /* handle the control word, setting FPROUND and detecting any
       emulation warnings. */
-   pair  = x86h_check_fldcw ( (UInt)fpucw );
-   fprtz = (UInt)pair;
-   ew    = (VexEmWarn)(pair >> 32);
+   pair    = x86h_check_fldcw ( (UInt)fpucw );
+   fpround = (UInt)pair;
+   ew      = (VexEmWarn)(pair >> 32);
    
-   vex_state->guest_FPRTZ = fprtz & 1;
+   vex_state->guest_FPROUND = fpround & 3;
 
    /* emulation warnings --> caller */
    return ew;
@@ -1497,7 +1492,7 @@
    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
    x87->env[FP_ENV_STAT] = ((ftop & 7) << 11) | (c3210 & 0x4700);
    x87->env[FP_ENV_CTRL] 
-      = (UShort)x86h_create_fpucw( vex_state->guest_FPRTZ );
+      = (UShort)x86h_create_fpucw( vex_state->guest_FPROUND );
 
    tagw = 0;
    for (r = 0; r < 8; r++) {
@@ -1583,8 +1578,8 @@
       vex_state->guest_FPTAG[i] = 0; /* empty */
       vex_state->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
    }
-   vex_state->guest_FPRTZ  = 0; /* round to nearest */
-   vex_state->guest_FC3210 = 0;
+   vex_state->guest_FPROUND = (UInt)Irrm_NEAREST;
+   vex_state->guest_FC3210  = 0;
 
    vex_state->guest_CS = 0;
    vex_state->guest_DS = 0;
@@ -2502,7 +2497,7 @@
                  /*  4 */ ALWAYSDEFD(guest_EIP),
                  /*  5 */ ALWAYSDEFD(guest_FTOP),
                  /*  6 */ ALWAYSDEFD(guest_FPTAG),
-                 /*  7 */ ALWAYSDEFD(guest_FPRTZ),
+                 /*  7 */ ALWAYSDEFD(guest_FPROUND),
                  /*  8 */ ALWAYSDEFD(guest_FC3210),
                  /*  9 */ ALWAYSDEFD(guest_CS),
                  /* 10 */ ALWAYSDEFD(guest_DS),
diff --git a/priv/guest-x86/toIR.c b/priv/guest-x86/toIR.c
index 40b13fe..6dd1aed 100644
--- a/priv/guest-x86/toIR.c
+++ b/priv/guest-x86/toIR.c
@@ -120,7 +120,7 @@
 #define OFFB_IDFLAG   offsetof(VexGuestX86State,guest_IDFLAG)
 #define OFFB_FTOP     offsetof(VexGuestX86State,guest_FTOP)
 #define OFFB_FC3210   offsetof(VexGuestX86State,guest_FC3210)
-#define OFFB_FPRTZ    offsetof(VexGuestX86State,guest_FPRTZ)
+#define OFFB_FPROUND  offsetof(VexGuestX86State,guest_FPROUND)
 
 #define OFFB_EMWARN   offsetof(VexGuestX86State,guest_EMWARN)
 
@@ -3398,31 +3398,26 @@
 }
 
 /* --------- Get/put the FPU rounding mode. --------- */
-static IRExpr* /* :: Ity_I32 */ get_fprtz ( void )
+static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
 {
-   return IRExpr_Get( OFFB_FPRTZ, Ity_I32 );
+   return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
 }
 
-static void put_fprtz ( IRExpr* /* :: Ity_I32 */ e )
+static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
 {
-   stmt( IRStmt_Put( OFFB_FPRTZ, e ) );
+   stmt( IRStmt_Put( OFFB_FPROUND, e ) );
 }
 
 
 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
 /* Produces a value in 0 .. 3, which is encoded as per the type
-   IRRoundingMode.  Since 11b means round-to-zero and 00b means
-   round-to-nearest, this means the value can be synthesised from
-   bit 0 of guest_FPRTZ as (w<<31) >>s 31, where w = guest_FPRTZ.
+   IRRoundingMode.  Since the guest_FPROUND value is also encoded as
+   per IRRoundingMode, we merely need to get it and mask it for
+   safety.
 */
 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
 {
-   return 
-      binop( Iop_And32,
-             binop( Iop_Sar32,
-                    binop(Iop_Shl32, get_fprtz(), mkU8(31)),
-                    mkU8(31) ),
-             mkU32(3) );
+   return binop( Iop_And32, get_fpround(), mkU32(3) );
 }
 
 
@@ -3820,12 +3815,12 @@
 
             case 5: {/* FLDCW */
                /* The only thing we observe in the control word is the
-                  rounding mode, and even that only two values of.
-                  Therefore, pass the 16-bit value (x87 native-format
-                  control word) to a clean helper, getting back a
-                  64-bit value, the lower half of which is the FPRTZ
-                  value to store, and the upper half of which is the
-                  emulation-warning token which may be generated.
+                  rounding mode.  Therefore, pass the 16-bit value
+                  (x87 native-format control word) to a clean helper,
+                  getting back a 64-bit value, the lower half of which
+                  is the FPROUND value to store, and the upper half of
+                  which is the emulation-warning token which may be
+                  generated.
                */
                /* ULong x86h_check_fldcw ( UInt ); */
                IRTemp t64 = newTemp(Ity_I64);
@@ -3842,7 +3837,7 @@
                             )
                      );
 
-               put_fprtz( unop(Iop_64to32, mkexpr(t64)) );
+               put_fpround( unop(Iop_64to32, mkexpr(t64)) );
                assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
                put_emwarn( mkexpr(ew) );
                /* Finally, if an emulation warning was reported,
@@ -3860,9 +3855,9 @@
 
             case 7: /* FNSTCW */
               /* Fake up a native x87 FPU control word.  The only
-                 thing it depends on is FPRTZ[0], so call a clean
+                 thing it depends on is FPROUND[1:0], so call a clean
                  helper to cook it up. */
-               /* UInt x86h_create_fpucw ( UInt fptrz ) */
+               /* UInt x86h_create_fpucw ( UInt fpround ) */
                DIP("fnstcw %s", dis_buf);
                storeLE(
                   mkexpr(addr), 
@@ -3870,7 +3865,7 @@
                         mkIRExprCCall(
                            Ity_I32, 0/*regp*/,
                            "x86h_create_fpucw", &x86h_create_fpucw, 
-                           mkIRExprVec_1( get_fprtz() ) 
+                           mkIRExprVec_1( get_fpround() ) 
                         ) 
                   ) 
                );
@@ -4469,7 +4464,7 @@
                d->fxState[2].size   = 8 * sizeof(UChar);
 
                d->fxState[3].fx     = Ifx_Write;
-               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPRTZ);
+               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPROUND);
                d->fxState[3].size   = sizeof(UInt);
 
                d->fxState[4].fx     = Ifx_Write;
@@ -4526,7 +4521,7 @@
                d->fxState[2].size   = 8 * sizeof(UChar);
 
                d->fxState[3].fx     = Ifx_Read;
-               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPRTZ);
+               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPROUND);
                d->fxState[3].size   = sizeof(UInt);
 
                d->fxState[4].fx     = Ifx_Read;
diff --git a/priv/main/vex_main.c b/priv/main/vex_main.c
index 8bf93c9..b6e70b6 100644
--- a/priv/main/vex_main.c
+++ b/priv/main/vex_main.c
@@ -451,14 +451,10 @@
         return "none";
      case EmWarn_X86_x87exns:
         return "Unmasking x87 FP exceptions";
-     case EmWarn_X86_x87rounding:
-        return "Selection of unsupported x87 FP rounding mode (+inf/-inf)";
      case EmWarn_X86_x87precision:
         return "Selection of non-80-bit x87 FP precision";
      case EmWarn_X86_sseExns:
         return "Unmasking SSE FP exceptionss";
-     case EmWarn_X86_sseRounding:
-        return "Selection of unsupported SSE FP rounding mode";
      default: 
         vpanic("LibVEX_EmWarn_string: unknown warning");
    }
diff --git a/pub/libvex_emwarn.h b/pub/libvex_emwarn.h
index 1fd7e37..9b1c6ac 100644
--- a/pub/libvex_emwarn.h
+++ b/pub/libvex_emwarn.h
@@ -57,16 +57,15 @@
    enum {
       /* no warning indicated */
       EmWarn_NONE=0,
+
       /* unmasking x87 FP exceptions is not supported */
       EmWarn_X86_x87exns,
-      /* use of unsupported x87 FP rounding mode */
-      EmWarn_X86_x87rounding,
+
       /* change of x87 FP precision away from 64-bit (mantissa) */
       EmWarn_X86_x87precision,
+
       /* unmasking SSE FP exceptions is not supported */
       EmWarn_X86_sseExns,
-      /* use of unsupported SSE FP rounding mode */
-      EmWarn_X86_sseRounding,
       
       EmWarn_NUMBER
    }
diff --git a/pub/libvex_guest_x86.h b/pub/libvex_guest_x86.h
index ac7dfa2..e3ddfed 100644
--- a/pub/libvex_guest_x86.h
+++ b/pub/libvex_guest_x86.h
@@ -91,9 +91,11 @@
    * fst from st(0) to st(i) does not take an overflow fault even if the
      destination is already full.
 
-   FPRTZ[0] is the FPU's notional rounding mode -- 0 "to nearest" (the
-   default), 1 "to zero".  FPRTZ[31:1] is unused.  Round to
-   +infinity/-infinity is not supported.
+   FPROUND[1:0] is the FPU's notional rounding mode, encoded as per
+   the IRRoundingMode type (see libvex_ir.h).  This just happens to be
+   the Intel encoding.  Note carefully, the rounding mode is only
+   observed on float-to-int conversions, and not for float-to-float
+   operations.
 
    FC3210 contains the C3, C2, C1 and C0 bits in the same place they
    are in the FPU's status word.  (bits 14, 10, 9, 8 respectively).
@@ -126,7 +128,7 @@
       UInt  guest_FTOP;
       ULong guest_FPREG[8];
       UChar guest_FPTAG[8];
-      UInt  guest_FPRTZ;
+      UInt  guest_FPROUND;
       UInt  guest_FC3210;
       /* Segment registers. */
       UShort guest_CS;
diff --git a/test/fpucw.c b/test/fpucw.c
new file mode 100644
index 0000000..0130897
--- /dev/null
+++ b/test/fpucw.c
@@ -0,0 +1,43 @@
+
+#include <stdio.h>
+
+void fldcw_default ( void )
+{
+  asm(" pushw $0x037F ; fldcw (%esp) ; addl $2, %esp");
+}
+
+void fldcw_exns ( void )
+{
+  asm(" pushw $0x037E ; fldcw (%esp) ; addl $2, %esp");
+}
+
+void fldcw_precision ( void )
+{
+  asm(" pushw $0x007F ; fldcw (%esp) ; addl $2, %esp");
+}
+
+void fldcw_rounding ( void )
+{
+  asm(" pushw $0x077F ; fldcw (%esp) ; addl $2, %esp");
+}
+
+int main ( void )
+{
+   printf("default\n");
+   fldcw_default();
+   printf("\n");
+
+   printf("exns\n");
+   fldcw_exns();
+   printf("\n");
+
+   printf("precision\n");
+   fldcw_precision();
+   printf("\n");
+
+   printf("rounding\n");
+   fldcw_rounding();
+   printf("\n");
+
+   return 0;
+}