Create a new mechanism: "emulation warnings", which is a way for Vex
to report to whatever is using it that it cannot emulate precisely.
Net result is that a bb can exit with the guest state pointer set to
VEX_TRC_EMWARN.  In this case, the (mandatory) guest state psuedo-reg
called "guest_EMWARN" holds a value of type VexEmWarn, indicating the
kind of problem encounted.

Use this to warn about approximations in the x87 FPU simulation:
unmasked exceptions not supported, round to +inf/-inf not supported,
precisions other than 80-bit not supported.



git-svn-id: svn://svn.valgrind.org/vex/trunk@595 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/Makefile b/Makefile
index 20fb766..cfeae6f 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@
 		pub/libvex_ir.h				\
 		pub/libvex.h				\
 		pub/libvex_trc_values.h			\
+		pub/libvex_emwarn.h			\
 		pub/libvex_guest_x86.h			\
 		pub/libvex_guest_amd64.h		\
 		pub/libvex_guest_arm.h
diff --git a/priv/guest-x86/gdefs.h b/priv/guest-x86/gdefs.h
index 2cca503..84f47eb 100644
--- a/priv/guest-x86/gdefs.h
+++ b/priv/guest-x86/gdefs.h
@@ -95,6 +95,10 @@
                 UInt arg, UInt rot_amt, UInt eflags_in, UInt sz 
              );
 
+extern ULong x86h_check_fldcw ( UInt fpucw );
+
+extern UInt x86h_create_fpucw ( UInt fptrz );
+
 /* --- Clean helpers for MMX --- */
 
 extern ULong x86g_calculate_add32x2 ( ULong, ULong );
diff --git a/priv/guest-x86/ghelpers.c b/priv/guest-x86/ghelpers.c
index e09b919..91ab8d8 100644
--- a/priv/guest-x86/ghelpers.c
+++ b/priv/guest-x86/ghelpers.c
@@ -34,6 +34,7 @@
 */
 
 #include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
 #include "libvex_guest_x86.h"
 #include "libvex_ir.h"
 #include "libvex.h"
@@ -1386,9 +1387,51 @@
 #define FP_REG(ii)    (10*(7-(ii)))
 
 
+/* native_fpucw[15:0] contains a x87 native format FPU control word.
+   Extract from it the required FPRTZ value and any resulting
+   emulation warning, and return (warn << 32) | fprtz value. */
+/* CLEAN HELPER */
+ULong x86h_check_fldcw ( UInt fpucw )
+{
+   /* Decide on a rounding mode.  fpucw[11:10] must be either
+      00b(round to nearest) or 11b(round to zero).  No others
+      supported.  Others are mapped to round-to-nearest. */
+   UInt rmode = (fpucw >> 10) & 3;
+   UInt fprtz = rmode==0 ? 0 : 1;
+
+   /* Detect any required emulation warnings. */
+   VexEmWarn ew = EmWarn_NONE;
+
+   if ((fpucw & 0x3F) != 0x3F) {
+      /* unmasked exceptions! */
+      ew = EmWarn_X86_x87exns;
+   }
+   else 
+   if (rmode != 0 && rmode != 3) {
+      /* unsupported rounding mode */
+      ew = EmWarn_X86_x87rounding;
+   }
+   else
+   if (((fpucw >> 8) & 3) != 3) {
+      /* unsupported precision */
+      ew = EmWarn_X86_x87precision;
+   }
+
+   return (((ULong)ew) << 32) | ((ULong)fprtz);
+}
+
+/* CLEAN HELPER */
+/* Given fprtz as 1 or 0, create a suitable x87 native format
+   FPU control word. */
+UInt x86h_create_fpucw ( UInt fprtz )
+{
+   return (fprtz & 1) ? 0x0F7F : 0x037F;
+}
+
+
 /* VISIBLE TO LIBVEX CLIENT */
-void LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
-                               /*OUT*/VexGuestX86State* vex_state )
+VexEmWarn LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
+                                    /*OUT*/VexGuestX86State* vex_state )
 {
    Int        r;
    UInt       tag;
@@ -1399,6 +1442,9 @@
    UInt       tagw    = x87->env[FP_ENV_TAG];
    UInt       fpucw   = x87->env[FP_ENV_CTRL];
    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
+   VexEmWarn  ew;
+   UInt       fprtz;
+   ULong      pair;
 
    /* Copy registers and tags */
    for (r = 0; r < 8; r++) {
@@ -1417,11 +1463,19 @@
    /* stack pointer */
    vex_state->guest_FTOP = ftop;
 
-   /* control word */
-   vex_state->guest_FPUCW = fpucw;
-
    /* status word */
    vex_state->guest_FC3210 = c3210;
+
+   /* handle the control word, setting FPRTZ and detecting any 
+      emulation warnings. */
+   pair  = x86h_check_fldcw ( (UInt)fpucw );
+   fprtz = (UInt)pair;
+   ew    = (VexEmWarn)(pair >> 32);
+   
+   vex_state->guest_FPRTZ = fprtz & 1;
+
+   /* emulation warnings --> caller */
+   return ew;
 }
 
 
@@ -1441,8 +1495,9 @@
       x87->env[i] = 0;
 
    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
-   x87->env[FP_ENV_CTRL] = (UShort)( vex_state->guest_FPUCW );
    x87->env[FP_ENV_STAT] = ((ftop & 7) << 11) | (c3210 & 0x4700);
+   x87->env[FP_ENV_CTRL] 
+      = (UShort)x86h_create_fpucw( vex_state->guest_FPRTZ );
 
    tagw = 0;
    for (r = 0; r < 8; r++) {
@@ -1528,9 +1583,7 @@
       vex_state->guest_FPTAG[i] = 0; /* empty */
       vex_state->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
    }
-   /* The default setting: all fp exceptions masked, rounding to
-      nearest, precision to 64 bits */
-   vex_state->guest_FPUCW = 0x03F7; 
+   vex_state->guest_FPRTZ  = 0; /* round to nearest */
    vex_state->guest_FC3210 = 0;
 
    vex_state->guest_CS = 0;
@@ -1539,6 +1592,8 @@
    vex_state->guest_FS = 0;
    vex_state->guest_GS = 0;
    vex_state->guest_SS = 0;
+
+   vex_state->guest_EMWARN = EmWarn_NONE;
 }
 
 
@@ -2434,7 +2489,7 @@
 
           /* Describe any sections to be regarded by Memcheck as
              'always-defined'. */
-          .n_alwaysDefd = 15,
+          .n_alwaysDefd = 16,
           /* flags thunk: OP and NDEP are always defd, whereas DEP1
              and DEP2 have to be tracked.  See detailed comment in
              gdefs.h on meaning of thunk fields. */
@@ -2447,14 +2502,15 @@
                  /*  4 */ ALWAYSDEFD(guest_EIP),
                  /*  5 */ ALWAYSDEFD(guest_FTOP),
                  /*  6 */ ALWAYSDEFD(guest_FPTAG),
-                 /*  7 */ ALWAYSDEFD(guest_FPUCW),
+                 /*  7 */ ALWAYSDEFD(guest_FPRTZ),
                  /*  8 */ ALWAYSDEFD(guest_FC3210),
                  /*  9 */ ALWAYSDEFD(guest_CS),
                  /* 10 */ ALWAYSDEFD(guest_DS),
                  /* 11 */ ALWAYSDEFD(guest_ES),
                  /* 12 */ ALWAYSDEFD(guest_FS),
                  /* 13 */ ALWAYSDEFD(guest_GS),
-	         /* 14 */ ALWAYSDEFD(guest_SS) 
+	         /* 14 */ ALWAYSDEFD(guest_SS),
+	         /* 15 */ ALWAYSDEFD(guest_EMWARN)
                }
         };
 
diff --git a/priv/guest-x86/toIR.c b/priv/guest-x86/toIR.c
index 3c33b84..685dd8a 100644
--- a/priv/guest-x86/toIR.c
+++ b/priv/guest-x86/toIR.c
@@ -120,7 +120,9 @@
 #define OFFB_IDFLAG   offsetof(VexGuestX86State,guest_IDFLAG)
 #define OFFB_FTOP     offsetof(VexGuestX86State,guest_FTOP)
 #define OFFB_FC3210   offsetof(VexGuestX86State,guest_FC3210)
-#define OFFB_FPUCW    offsetof(VexGuestX86State,guest_FPUCW)
+#define OFFB_FPRTZ    offsetof(VexGuestX86State,guest_FPRTZ)
+
+#define OFFB_EMWARN   offsetof(VexGuestX86State,guest_EMWARN)
 
 
 /*------------------------------------------------------------*/
@@ -1433,11 +1435,13 @@
    condPos = positiveIse_X86Condcode ( cond, &invert );
    if (invert) {
       stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
+                         Ijk_Boring,
                          IRConst_U32(d32_false) ) );
       irbb->next     = mkU32(d32_true);
       irbb->jumpkind = Ijk_Boring;
    } else {
       stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
+                         Ijk_Boring,
                          IRConst_U32(d32_true) ) );
       irbb->next     = mkU32(d32_false);
       irbb->jumpkind = Ijk_Boring;
@@ -3230,6 +3234,7 @@
    //uInstr2 (cb, JIFZ,  4, TempReg, tc,    Literal, 0);
    //uLiteral(cb, eip_next);
    stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
+                      Ijk_Boring,
                       IRConst_U32(eip_next) ) );
 
    //uInstr1 (cb, DEC,   4, TempReg, tc);
@@ -3243,6 +3248,7 @@
       jmp_lit(Ijk_Boring,eip);
    } else {
       stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
+                         Ijk_Boring,
                          IRConst_U32(eip) ) );
       jmp_lit(Ijk_Boring,eip_next);
    }
@@ -3349,6 +3355,13 @@
 
 /* --- Helper functions for dealing with the register stack. --- */
 
+/* --- Set the emulation-warning pseudo-register. --- */
+
+static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
+{
+   stmt( IRStmt_Put( OFFB_EMWARN, e ) );
+}
+
 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
 
 static IRExpr* mkQNaN64 ( void )
@@ -3360,7 +3373,7 @@
    return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
 }
 
-/* --------- Get/set the top-of-stack pointer. --------- */
+/* --------- Get/put the top-of-stack pointer. --------- */
 
 static IRExpr* get_ftop ( void )
 {
@@ -3372,7 +3385,7 @@
    stmt( IRStmt_Put( OFFB_FTOP, e ) );
 }
 
-/* --------- Get/set the C3210 bits of the control word. --------- */
+/* --------- Get/put the C3210 bits. --------- */
 
 static IRExpr* get_C3210 ( void )
 {
@@ -3384,30 +3397,32 @@
    stmt( IRStmt_Put( OFFB_FC3210, e ) );
 }
 
-/* --------- Get/set the FPU control word. --------- */
-/* Note, IA32 has this as a 16-bit value, so fstcw/fldcw need to cast
-   to/from 16 bits.  Here we represent it in 32 bits. */
-static IRExpr* /* :: Ity_I32 */ get_fpucw ( void )
+/* --------- Get/put the FPU rounding mode. --------- */
+static IRExpr* /* :: Ity_I32 */ get_fprtz ( void )
 {
-   return IRExpr_Get( OFFB_FPUCW, Ity_I32 );
+   return IRExpr_Get( OFFB_FPRTZ, Ity_I32 );
 }
 
-static void put_fpucw ( IRExpr* /* :: Ity_I32 */ e )
+static void put_fprtz ( IRExpr* /* :: Ity_I32 */ e )
 {
-   stmt( IRStmt_Put( OFFB_FPUCW, e ) );
+   stmt( IRStmt_Put( OFFB_FPRTZ, e ) );
 }
 
 
-/* --------- Get the FPU rounding mode from the CW. --------- */
+/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
 /* Produces a value in 0 .. 3, which is encoded as per the type
-   IRRoundingMode.  On IA32 the relevant value is precisely bits 11
-   and 10 of the control word.
+   IRRoundingMode.  Since 11b means round-to-zero and 00b means
+   round-to-nearest, this means the value can be synthesised from
+   bit 0 of guest_FPRTZ as (w<<31) >>s 31, where w = guest_FPRTZ.
 */
 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
 {
-   return binop( Iop_And32, 
-                 binop(Iop_Shr32, get_fpucw(), mkU8(10)),
-                 mkU32(3) );
+   return 
+      binop( Iop_And32,
+             binop( Iop_Sar32,
+                    binop(Iop_Shl32, get_fprtz(), mkU8(31)),
+                    mkU8(31) ),
+             mkU32(3) );
 }
 
 
@@ -3801,14 +3816,62 @@
                fp_pop();
                break;
 
-            case 5: /* FLDCW */
+            case 5: {/* FLDCW */
+               /* The only thing we observe in the control word is the
+                  rounding mode, and even that only two values of.
+                  Therefore, pass the 16-bit value (x87 native-format
+                  control word) to a clean helper, getting back a
+                  64-bit value, the lower half of which is the FPRTZ
+                  value to store, and the upper half of which is the
+                  emulation-warning token which may be generated.
+               */
+               /* ULong x86h_check_fldcw ( UInt ); */
+               IRTemp t64 = newTemp(Ity_I64);
+               IRTemp ew = newTemp(Ity_I32);
                DIP("fldcw %s", dis_buf);
-               put_fpucw( unop(Iop_16Uto32, loadLE(Ity_I16, mkexpr(addr))) );
+               assign( t64, mkIRExprCCall(
+                               Ity_I64, 0/*regparms*/, 
+                               "x86h_check_fldcw",
+                               &x86h_check_fldcw, 
+                               mkIRExprVec_1( 
+                                  unop( Iop_16Uto32, 
+                                        loadLE(Ity_I16, mkexpr(addr)))
+                               )
+                            )
+                     );
+
+               put_fprtz( unop(Iop_64to32, mkexpr(t64)) );
+               assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
+               put_emwarn( mkexpr(ew) );
+               /* Finally, if an emulation warning was reported,
+                  side-exit to the next insn, reporting the warning,
+                  so that Valgrind's dispatcher sees the warning. */
+               stmt( 
+                  IRStmt_Exit(
+                     binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+                     Ijk_EmWarn,
+                     IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
+                  )
+               );
                break;
+            }
 
             case 7: /* FNSTCW */
+              /* Fake up a native x87 FPU control word.  The only
+                 thing it depends on is FPRTZ[0], so call a clean
+                 helper to cook it up. */
+               /* UInt x86h_create_fpucw ( UInt fptrz ) */
                DIP("fnstcw %s", dis_buf);
-               storeLE(mkexpr(addr), unop(Iop_32to16, get_fpucw()));
+               storeLE(
+                  mkexpr(addr), 
+                  unop( Iop_32to16, 
+                        mkIRExprCCall(
+                           Ity_I32, 0/*regp*/,
+                           "x86h_create_fpucw", &x86h_create_fpucw, 
+                           mkIRExprVec_1( get_fprtz() ) 
+                        ) 
+                  ) 
+               );
                break;
 
             default:
@@ -4367,21 +4430,24 @@
                break;
 
             case 4: { /* FRSTOR m108 */
-               /* Uses dirty helper: x86g_do_FRSTOR ( VexGuestX86State*, UInt ) */
-               IRDirty* d = unsafeIRDirty_0_N ( 
-                               0/*regparms*/, 
-                               "x86g_dirtyhelper_FRSTOR", 
-                               &x86g_dirtyhelper_FRSTOR,
-                               mkIRExprVec_1( mkexpr(addr) )
-                            );
+               /* Uses dirty helper: 
+                     VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
+               IRTemp   ew = newTemp(Ity_I32);
+               IRDirty* d  = unsafeIRDirty_0_N ( 
+                                0/*regparms*/, 
+                                "x86g_dirtyhelper_FRSTOR", 
+                                &x86g_dirtyhelper_FRSTOR,
+                                mkIRExprVec_1( mkexpr(addr) )
+                             );
                d->needsBBP = True;
+               d->tmp      = ew;
                /* declare we're reading memory */
                d->mFx   = Ifx_Read;
                d->mAddr = mkexpr(addr);
                d->mSize = 108;
 
                /* declare we're writing guest state */
-	       d->nFxState = 5;
+               d->nFxState = 5;
 
                d->fxState[0].fx     = Ifx_Write;
                d->fxState[0].offset = offsetof(VexGuestX86State,guest_FTOP);
@@ -4396,7 +4462,7 @@
                d->fxState[2].size   = 8 * sizeof(UChar);
 
                d->fxState[3].fx     = Ifx_Write;
-               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPUCW);
+               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPRTZ);
                d->fxState[3].size   = sizeof(UInt);
 
                d->fxState[4].fx     = Ifx_Write;
@@ -4405,12 +4471,26 @@
 
                stmt( IRStmt_Dirty(d) );
 
+               /* ew contains any emulation warning we may need to
+                  issue.  If needed, side-exit to the next insn,
+                  reporting the warning, so that Valgrind's dispatcher
+                  sees the warning. */
+               put_emwarn( mkexpr(ew) );
+               stmt( 
+                  IRStmt_Exit(
+                     binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+                     Ijk_EmWarn,
+                     IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
+                  )
+               );
+
                DIP("frstor %s", dis_buf);
                break;
             }
 
             case 6: { /* FNSAVE m108 */
-               /* Uses dirty helper: x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
+               /* Uses dirty helper: 
+                     void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
                IRDirty* d = unsafeIRDirty_0_N ( 
                                0/*regparms*/, 
                                "x86g_dirtyhelper_FSAVE", 
@@ -4424,7 +4504,7 @@
                d->mSize = 108;
 
                /* declare we're reading guest state */
-	       d->nFxState = 5;
+               d->nFxState = 5;
 
                d->fxState[0].fx     = Ifx_Read;
                d->fxState[0].offset = offsetof(VexGuestX86State,guest_FTOP);
@@ -4439,7 +4519,7 @@
                d->fxState[2].size   = 8 * sizeof(UChar);
 
                d->fxState[3].fx     = Ifx_Read;
-               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPUCW);
+               d->fxState[3].offset = offsetof(VexGuestX86State,guest_FPRTZ);
                d->fxState[3].size   = sizeof(UInt);
 
                d->fxState[4].fx     = Ifx_Read;
@@ -4955,7 +5035,7 @@
          } else {
             IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
             delta += len;
-	    storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
+            storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
             DIP("mov(nt)q %s, %s\n", 
                 nameMMXReg(gregOfRM(modrm)), dis_buf);
          }
@@ -8256,6 +8336,7 @@
                binop(mkSizedOp(ty,Iop_CmpEQ8),
                      getIReg(sz,R_ECX),
                      mkU(ty,0)),
+            Ijk_Boring,
             IRConst_U32(d32)) 
           );
 
diff --git a/priv/host-x86/hdefs.c b/priv/host-x86/hdefs.c
index 70e9f8a..97dd66c 100644
--- a/priv/host-x86/hdefs.c
+++ b/priv/host-x86/hdefs.c
@@ -563,8 +563,6 @@
    i->Xin.Goto.cond = cond;
    i->Xin.Goto.dst  = dst;
    i->Xin.Goto.jk   = jk;
-   /* non-Boring conditional jumps are not allowed. */
-   vassert(jk == Ijk_Boring || cond == Xcc_ALWAYS);
    return i;
 }
 X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
@@ -749,23 +747,21 @@
          vex_printf("0x%x", i->Xin.Call.target);
          break;
       case Xin_Goto:
-         if (i->Xin.Goto.jk == Ijk_ClientReq 
-             || i->Xin.Goto.jk == Ijk_Syscall
-             || i->Xin.Goto.jk == Ijk_Yield) {
+         if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+            vex_printf("if (%%eflags.%s) { ", 
+                       showX86CondCode(i->Xin.Goto.cond));
+	 }
+         if (i->Xin.Goto.jk != Ijk_Boring) {
             vex_printf("movl $");
             ppIRJumpKind(i->Xin.Goto.jk);
-            vex_printf(", %%ebp ; ");
+            vex_printf(",%%ebp ; ");
          }
-         if (i->Xin.Goto.cond == Xcc_ALWAYS) {
-            vex_printf("movl ");
-            ppX86RI(i->Xin.Goto.dst);
-            vex_printf(",%%eax ; ret");
-         } else {
-            vex_printf("if (%%eflags.%s) { movl ", 
-                       showX86CondCode(i->Xin.Goto.cond));
-            ppX86RI(i->Xin.Goto.dst);
-            vex_printf(",%%eax ; ret }");
-         }
+         vex_printf("movl ");
+         ppX86RI(i->Xin.Goto.dst);
+         vex_printf(",%%eax ; ret");
+         if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+            vex_printf(" }");
+	 }
          return;
       case Xin_CMov32:
          vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
@@ -953,9 +949,7 @@
       case Xin_Goto:
          addRegUsage_X86RI(u, i->Xin.Goto.dst);
          addHRegUse(u, HRmWrite, hregX86_EAX());
-         if (i->Xin.Goto.jk == Ijk_ClientReq 
-             || i->Xin.Goto.jk == Ijk_Syscall
-             || i->Xin.Goto.jk == Ijk_Yield)
+         if (i->Xin.Goto.jk != Ijk_Boring)
             addHRegUse(u, HRmWrite, hregX86_EBP());
          return;
       case Xin_CMov32:
@@ -1719,65 +1713,72 @@
       goto done;
 
    case Xin_Goto:
-      /* If a non-boring unconditional jump, set %ebp (the guest state
-         pointer) appropriately. */
-      if (i->Xin.Goto.cond == Xcc_ALWAYS
-          && (i->Xin.Goto.jk == Ijk_ClientReq 
-              || i->Xin.Goto.jk == Ijk_Syscall
-              || i->Xin.Goto.jk == Ijk_Yield)) {
-         /* movl $magic_number, %ebp */
-         *p++ = 0xBD;
-         switch (i->Xin.Goto.jk) {
-            case Ijk_ClientReq: 
-               p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
-            case Ijk_Syscall: 
-               p = emit32(p, VEX_TRC_JMP_SYSCALL); break;
-            case Ijk_Yield: 
-               p = emit32(p, VEX_TRC_JMP_YIELD); break;
-            default: 
-               ppIRJumpKind(i->Xin.Goto.jk);
-               vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
-         }
+      /* Use ptmp for backpatching conditional jumps. */
+      ptmp = NULL;
+
+      /* First off, if this is conditional, create a conditional
+	 jump over the rest of it. */
+      if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+         /* jmp fwds if !condition */
+         *p++ = 0x70 + (i->Xin.Goto.cond ^ 1);
+         ptmp = p; /* fill in this bit later */
+         *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
       }
-      /* unconditional jump to immediate */
-      if (i->Xin.Goto.cond == Xcc_ALWAYS
-          && i->Xin.Goto.dst->tag == Xri_Imm) {
+
+      /* If a non-boring, set %ebp (the guest state pointer)
+         appropriately. */
+      /* movl $magic_number, %ebp */
+      switch (i->Xin.Goto.jk) {
+         case Ijk_ClientReq: 
+            *p++ = 0xBD;
+            p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
+         case Ijk_Syscall: 
+            *p++ = 0xBD;
+            p = emit32(p, VEX_TRC_JMP_SYSCALL); break;
+         case Ijk_Yield: 
+            *p++ = 0xBD;
+            p = emit32(p, VEX_TRC_JMP_YIELD); break;
+         case Ijk_EmWarn:
+            *p++ = 0xBD;
+            p = emit32(p, VEX_TRC_JMP_EMWARN); break;
+         case Ijk_Ret:
+	 case Ijk_Call:
+         case Ijk_Boring:
+            break;
+         default: 
+            ppIRJumpKind(i->Xin.Goto.jk);
+            vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
+      }
+
+      /* Get the destination address into %eax */
+      if (i->Xin.Goto.dst->tag == Xri_Imm) {
          /* movl $immediate, %eax ; ret */
          *p++ = 0xB8;
          p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
-         *p++ = 0xC3;
-         goto done;
-      }
-      /* unconditional jump to reg */
-      if (i->Xin.Goto.cond == Xcc_ALWAYS
-          && i->Xin.Goto.dst->tag == Xri_Reg) {
+      } else {
+         vassert(i->Xin.Goto.dst->tag == Xri_Reg);
          /* movl %reg, %eax ; ret */
          if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
             *p++ = 0x89;
             p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
          }
-         *p++ = 0xC3;
-         goto done;
       }
-      /* conditional jump to immediate */
-      if (i->Xin.Goto.cond != Xcc_ALWAYS
-          && i->Xin.Goto.dst->tag == Xri_Imm) {
-         vassert(i->Xin.Goto.jk == Ijk_Boring);
-         /* jmp fwds if !condition */
-         *p++ = 0x70 + (i->Xin.Goto.cond ^ 1);
-         *p++ = 6; /* # of bytes in the next bit */
-         /* movl $immediate, %eax ; ret */
-         *p++ = 0xB8;
-         p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
-         *p++ = 0xC3;
-         goto done;
+
+      /* ret */
+      *p++ = 0xC3;
+
+      /* Fix up the conditional jump, if there was one. */
+      if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+         Int delta = p - ptmp;
+	 vassert(delta > 0 && delta < 20);
+         *ptmp = (UChar)(delta-1);
       }
-      break;
+      goto done;
 
    case Xin_CMov32:
       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
 #if 0
-      /* This generates cmov, which is illegal on P5. */
+      /* This generates cmov, which is illegal on P54/P55. */
       *p++ = 0x0F;
       *p++ = 0x40 + i->Xin.CMov32.cond;
       if (i->Xin.CMov32.src->tag == Xrm_Reg) {
diff --git a/priv/host-x86/isel.c b/priv/host-x86/isel.c
index 84a8598..a912875 100644
--- a/priv/host-x86/isel.c
+++ b/priv/host-x86/isel.c
@@ -2555,7 +2555,7 @@
          vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
       dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
       cc  = iselCondCode(env,stmt->Ist.Exit.guard);
-      addInstr(env, X86Instr_Goto(Ijk_Boring, cc, dst));
+      addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
       return;
    }
 
diff --git a/priv/ir/irdefs.c b/priv/ir/irdefs.c
index d915bb4..8105796 100644
--- a/priv/ir/irdefs.c
+++ b/priv/ir/irdefs.c
@@ -346,6 +346,20 @@
    vex_printf(")");
 }
 
+void ppIRJumpKind ( IRJumpKind kind )
+{
+   switch (kind) {
+      case Ijk_Boring:    vex_printf("Boring"); break;
+      case Ijk_Call:      vex_printf("Call"); break;
+      case Ijk_Ret:       vex_printf("Return"); break;
+      case Ijk_ClientReq: vex_printf("ClientReq"); break;
+      case Ijk_Syscall:   vex_printf("Syscall"); break;
+      case Ijk_Yield:     vex_printf("Yield"); break;
+      case Ijk_EmWarn:    vex_printf("EmWarn"); break;
+      default:            vpanic("ppIRJumpKind");
+   }
+}
+
 void ppIRStmt ( IRStmt* s )
 {
    switch (s->tag) {
@@ -378,7 +392,9 @@
       case Ist_Exit:
          vex_printf( "if (" );
          ppIRExpr(s->Ist.Exit.guard);
-         vex_printf( ") goto ");
+         vex_printf( ") goto {");
+         ppIRJumpKind(s->Ist.Exit.jk);
+         vex_printf("} ");
          ppIRConst(s->Ist.Exit.dst);
          break;
       default: 
@@ -386,19 +402,6 @@
    }
 }
 
-void ppIRJumpKind ( IRJumpKind kind )
-{
-   switch (kind) {
-      case Ijk_Boring:    vex_printf("Boring"); break;
-      case Ijk_Call:      vex_printf("Call"); break;
-      case Ijk_Ret:       vex_printf("Return"); break;
-      case Ijk_ClientReq: vex_printf("ClientReq"); break;
-      case Ijk_Syscall:   vex_printf("Syscall"); break;
-      case Ijk_Yield:     vex_printf("Yield"); break;
-      default:            vpanic("ppIRJumpKind");
-   }
-}
-
 void ppIRTypeEnv ( IRTypeEnv* env ) {
    UInt i;
    for (i = 0; i < env->types_used; i++) {
@@ -712,10 +715,11 @@
    s->Ist.Dirty.details = d;
    return s;
 }
-IRStmt* IRStmt_Exit ( IRExpr* guard, IRConst* dst ) {
+IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
    IRStmt* s         = LibVEX_Alloc(sizeof(IRStmt));
    s->tag            = Ist_Exit;
    s->Ist.Exit.guard = guard;
+   s->Ist.Exit.jk    = jk;
    s->Ist.Exit.dst   = dst;
    return s;
 }
@@ -885,7 +889,8 @@
       case Ist_Dirty: 
          return IRStmt_Dirty(dopyIRDirty(s->Ist.Dirty.details));
       case Ist_Exit: 
-         return IRStmt_Exit(dopyIRExpr(s->Ist.Exit.guard), 
+         return IRStmt_Exit(dopyIRExpr(s->Ist.Exit.guard),
+                            s->Ist.Exit.jk,
                             dopyIRConst(s->Ist.Exit.dst));
       default: 
          vpanic("dopyIRStmt");
diff --git a/priv/ir/iropt.c b/priv/ir/iropt.c
index ef7c652..4e76d65 100644
--- a/priv/ir/iropt.c
+++ b/priv/ir/iropt.c
@@ -328,7 +328,8 @@
          break;
       case Ist_Exit:
          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
-         addStmtToIRBB(bb, IRStmt_Exit(e1, st->Ist.Exit.dst));
+         addStmtToIRBB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
+                                           st->Ist.Exit.dst));
          break;
       default:
          vex_printf("\n");
@@ -859,7 +860,7 @@
                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
             }
          }
-         return IRStmt_Exit(fcond,st->Ist.Exit.dst);
+         return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
       }
 
    default:
@@ -1737,6 +1738,7 @@
       case Ist_Exit:
          return IRStmt_Exit(
                    tbSubst_Expr(env, st->Ist.Exit.guard),
+                   st->Ist.Exit.jk,
                    st->Ist.Exit.dst
                 );
       case Ist_Dirty:
diff --git a/priv/main/vex_main.c b/priv/main/vex_main.c
index 062574c..8bf93c9 100644
--- a/priv/main/vex_main.c
+++ b/priv/main/vex_main.c
@@ -34,6 +34,7 @@
 */
 
 #include "libvex.h"
+#include "libvex_emwarn.h"
 #include "libvex_guest_x86.h"
 #include "libvex_guest_arm.h"
 
@@ -441,6 +442,27 @@
 }
 
 
+/* --------- Emulation warnings. --------- */
+
+HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
+{
+   switch (ew) {
+     case EmWarn_NONE: 
+        return "none";
+     case EmWarn_X86_x87exns:
+        return "Unmasking x87 FP exceptions";
+     case EmWarn_X86_x87rounding:
+        return "Selection of unsupported x87 FP rounding mode (+inf/-inf)";
+     case EmWarn_X86_x87precision:
+        return "Selection of non-80-bit x87 FP precision";
+     case EmWarn_X86_sseExns:
+        return "Unmasking SSE FP exceptionss";
+     case EmWarn_X86_sseRounding:
+        return "Selection of unsupported SSE FP rounding mode";
+     default: 
+        vpanic("LibVEX_EmWarn_string: unknown warning");
+   }
+}
 
 /*---------------------------------------------------------------*/
 /*--- end                                     main/vex_main.c ---*/
diff --git a/pub/libvex_emwarn.h b/pub/libvex_emwarn.h
new file mode 100644
index 0000000..1fd7e37
--- /dev/null
+++ b/pub/libvex_emwarn.h
@@ -0,0 +1,84 @@
+
+/*---------------------------------------------------------------*/
+/*---                                                         ---*/
+/*--- This file (libvex_emwarn.h) is                          ---*/
+/*--- Copyright (c) 2004 OpenWorks LLP.  All rights reserved. ---*/
+/*---                                                         ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of LibVEX, a library for dynamic binary
+   instrumentation and translation.
+
+   Copyright (C) 2004 OpenWorks, LLP.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; Version 2 dated June 1991 of the
+   license.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability
+   for damages.  See the GNU General Public License for more details.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+   USA.
+*/
+
+#ifndef __LIBVEX_EMWARN_H
+#define __LIBVEX_EMWARN_H
+
+
+/* VEX can sometimes generate code which returns to the dispatcher
+   with the guest state pointer set to VEX_TRC_JMP_EMWARN.  This means
+   that VEX is trying to warn Valgrind that it is doing imprecise
+   emulation in some sense.  The guest's pseudo-register
+   "guest_EMWARN" will hold a value of type VexEmWarn, which describes
+   the nature of the warning.  Currently the limitations that are
+   warned about apply primarily to floating point support.
+
+   All guest states should have a 32-bit (UInt) guest_EMWARN pseudo-
+   register, that emulation warnings can be written in to.
+
+   Note that guest_EMWARN only carries a valid value at the jump
+   marked as VEX_TRC_JMP_EMWARN.  You can't assume it will continue to
+   carry a valid value from any amount of time after the jump.
+*/
+
+typedef
+   enum {
+      /* no warning indicated */
+      EmWarn_NONE=0,
+      /* unmasking x87 FP exceptions is not supported */
+      EmWarn_X86_x87exns,
+      /* use of unsupported x87 FP rounding mode */
+      EmWarn_X86_x87rounding,
+      /* change of x87 FP precision away from 64-bit (mantissa) */
+      EmWarn_X86_x87precision,
+      /* unmasking SSE FP exceptions is not supported */
+      EmWarn_X86_sseExns,
+      /* use of unsupported SSE FP rounding mode */
+      EmWarn_X86_sseRounding,
+      
+      EmWarn_NUMBER
+   }
+   VexEmWarn;
+
+
+/* Produces a short string describing the warning. */
+extern HChar* LibVEX_EmWarn_string ( VexEmWarn );
+
+
+#endif /* ndef __LIBVEX_EMWARN_H */
+
+/*---------------------------------------------------------------*/
+/*---                                         libvex_emwarn.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/pub/libvex_guest_amd64.h b/pub/libvex_guest_amd64.h
index 68cd0c9..079fb43 100644
--- a/pub/libvex_guest_amd64.h
+++ b/pub/libvex_guest_amd64.h
@@ -74,6 +74,8 @@
          segment registers
       */
 
+      /* Emulation warnings */
+      UInt   guest_EMWARN;
       /* Padding to make it have an 8-aligned size */
       /* UInt   padding; */
    }
diff --git a/pub/libvex_guest_arm.h b/pub/libvex_guest_arm.h
index 4ab7463..9969605 100644
--- a/pub/libvex_guest_arm.h
+++ b/pub/libvex_guest_arm.h
@@ -78,6 +78,9 @@
       UInt  guest_CC_DEP1;
       UInt  guest_CC_DEP2;
 
+      /* Emulation warnings */
+      UInt   guest_EMWARN;
+
       /* Padding to make it have an 8-aligned size */
       /* UInt   padding; */
    }
diff --git a/pub/libvex_guest_x86.h b/pub/libvex_guest_x86.h
index bbdb899..ac7dfa2 100644
--- a/pub/libvex_guest_x86.h
+++ b/pub/libvex_guest_x86.h
@@ -37,6 +37,8 @@
 #define __LIBVEX_PUB_GUEST_X86_H
 
 #include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+
 
 /*---------------------------------------------------------------*/
 /*--- Vex's representation of the x86 CPU state.              ---*/
@@ -89,14 +91,15 @@
    * fst from st(0) to st(i) does not take an overflow fault even if the
      destination is already full.
 
-   FPUCW[15:0] is the FPU's control word.  FPUCW[31:16] is unused.
+   FPRTZ[0] is the FPU's notional rounding mode -- 0 "to nearest" (the
+   default), 1 "to zero".  FPRTZ[31:1] is unused.  Round to
+   +infinity/-infinity is not supported.
 
    FC3210 contains the C3, C2, C1 and C0 bits in the same place they
    are in the FPU's status word.  (bits 14, 10, 9, 8 respectively).
    All other bits should be zero.  The relevant mask to select just
    those bits is 0x4700.  To select C3, C2 and C0 only, the mask is
-   0x4500.
-*/
+   0x4500.  */
 
 typedef
    struct {
@@ -123,7 +126,7 @@
       UInt  guest_FTOP;
       ULong guest_FPREG[8];
       UChar guest_FPTAG[8];
-      UInt  guest_FPUCW;
+      UInt  guest_FPRTZ;
       UInt  guest_FC3210;
       /* Segment registers. */
       UShort guest_CS;
@@ -132,8 +135,10 @@
       UShort guest_FS;
       UShort guest_GS;
       UShort guest_SS;
+      /* Emulation warnings */
+      UInt   guest_EMWARN;
       /* Padding to make it have an 8-aligned size */
-      UInt   padding;
+      /* UInt   padding; */
    }
    VexGuestX86State;
 
@@ -154,10 +159,12 @@
 
 /* Convert a saved x87 FPU image (as created by fsave) and write it
    into the supplied VexGuestX86State structure.  The non-FP parts of
-   said structure are left unchanged.  
+   said structure are left unchanged.  May return an emulation warning
+   value.
 */
 extern 
-void LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state, 
+VexEmWarn
+     LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state, 
                                /*OUT*/VexGuestX86State* vex_state );
 
 /* Extract from the supplied VexGuestX86State structure, an x87 FPU
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index aa847a3..c37adde 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -496,6 +496,26 @@
 }
 
 
+/* ------------------ Jump kinds ------------------ */
+
+/* This describes hints which can be passed to the dispatcher at guest
+   control-flow transfer points.
+*/
+typedef
+   enum { 
+      Ijk_Boring=0x14000, /* not interesting; just goto next */
+      Ijk_Call,           /* guest is doing a call */
+      Ijk_Ret,            /* guest is doing a return */
+      Ijk_ClientReq,      /* do guest client req before continuing */
+      Ijk_Syscall,        /* do guest syscall before continuing */
+      Ijk_Yield,          /* client is yielding to thread scheduler */
+      Ijk_EmWarn          /* report emulation warning before continuing */
+   }
+   IRJumpKind;
+
+extern void ppIRJumpKind ( IRJumpKind );
+
+
 /* ------------------ Dirty helper calls ------------------ */
 
 /* A dirty call is a flexible mechanism for calling a helper function
@@ -638,8 +658,9 @@
             IRDirty* details;
          } Dirty;
          struct {
-            IRExpr*  guard;
-            IRConst* dst;
+            IRExpr*    guard;
+            IRJumpKind jk;
+            IRConst*   dst;
          } Exit;
       } Ist;
    }
@@ -651,7 +672,7 @@
 extern IRStmt* IRStmt_Tmp   ( IRTemp tmp, IRExpr* data );
 extern IRStmt* IRStmt_STle  ( IRExpr* addr, IRExpr* data );
 extern IRStmt* IRStmt_Dirty ( IRDirty* details );
-extern IRStmt* IRStmt_Exit  ( IRExpr* guard, IRConst* dst );
+extern IRStmt* IRStmt_Exit  ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
 
 extern IRStmt* dopyIRStmt ( IRStmt* );
 
@@ -660,25 +681,6 @@
 
 /* ------------------ Basic Blocks ------------------ */
 
-/* This describes the unconditional jumps which implicitly happen at
-   the end of each basic block.  Conditional jumps -- which can only
-   be done with the IRStmt_Exit statement -- are implicitly of the
-   Ijk_Boring kind. */
-
-typedef
-   enum { 
-      Ijk_Boring=0x14000, /* not interesting; just goto next */
-      Ijk_Call,           /* guest is doing a call */
-      Ijk_Ret,            /* guest is doing a return */
-      Ijk_ClientReq,      /* do guest client req before continuing */
-      Ijk_Syscall,        /* do guest syscall before continuing */
-      Ijk_Yield           /* client is yielding to thread scheduler */
-   }
-   IRJumpKind;
-
-extern void ppIRJumpKind ( IRJumpKind );
-
-
 /* A bunch of statements, expressions, etc, are incomplete without an
    environment indicating the type of each IRTemp.  So this provides
    one.  IR temporaries are really just unsigned ints and so this
@@ -723,7 +725,7 @@
 
 extern void ppIRBB ( IRBB* );
 
-extern void  addStmtToIRBB ( IRBB*, IRStmt* );
+extern void addStmtToIRBB ( IRBB*, IRStmt* );
 
 
 /*---------------------------------------------------------------*/
diff --git a/pub/libvex_trc_values.h b/pub/libvex_trc_values.h
index 93088f9..9172b85 100644
--- a/pub/libvex_trc_values.h
+++ b/pub/libvex_trc_values.h
@@ -44,6 +44,8 @@
    This file may get included in assembly code, so do not put
    C-specific constructs in it.
 */
+#define VEX_TRC_JMP_EMWARN     17  /* deliver emulation warning before
+                                      continuing */
 #define VEX_TRC_JMP_SYSCALL    19  /* do a system call before continuing */
 #define VEX_TRC_JMP_CLIENTREQ  23  /* do a client req before continuing */
 #define VEX_TRC_JMP_YIELD      27  /* yield to thread sched