Support the SSE4 insn 'roundss' in 32-bit mode. Lack of this was
causing problems for people running 32-bit apps on MacOSX 10.6 on
newer hardware. Fixes #241377.
git-svn-id: svn://svn.valgrind.org/vex/trunk@1987 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_x86_toIR.c b/priv/guest_x86_toIR.c
index 1dcdf62..2bf234a 100644
--- a/priv/guest_x86_toIR.c
+++ b/priv/guest_x86_toIR.c
@@ -12522,6 +12522,67 @@
/* --- end of the SSSE3 decoder. --- */
/* ---------------------------------------------------- */
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
+ /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+ (Partial implementation only -- only deal with cases where
+ the rounding mode is specified directly by the immediate byte.)
+ 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+ (Limitations ditto)
+ */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A
+ && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) {
+
+ Bool isD = insn[2] == 0x0B;
+ IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+ IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
+ Int imm = 0;
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( src,
+ isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
+ : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
+ imm = insn[3+1];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+1+1;
+ DIP( "rounds%c $%d,%s,%s\n",
+ isD ? 'd' : 's',
+ imm, nameXMMReg( eregOfRM(modrm) ),
+ nameXMMReg( gregOfRM(modrm) ) );
+ } else {
+ addr = disAMode( &alen, sorb, delta+3, dis_buf );
+ assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
+ imm = insn[3+alen];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+alen+1;
+ DIP( "roundsd $%d,%s,%s\n",
+ imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
+ }
+
+ /* (imm & 3) contains an Intel-encoded rounding mode. Because
+ that encoding is the same as the encoding for IRRoundingMode,
+ we can use that value directly in the IR as a rounding
+ mode. */
+ assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+ mkU32(imm & 3), mkexpr(src)) );
+
+ if (isD)
+ putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
+ else
+ putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
+
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
after_sse_decoders:
/* ---------------------------------------------------- */
diff --git a/priv/host_x86_isel.c b/priv/host_x86_isel.c
index 02d83d8..e1242d6 100644
--- a/priv/host_x86_isel.c
+++ b/priv/host_x86_isel.c
@@ -2776,6 +2776,25 @@
return dst;
}
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+ HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegF(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ /* grndint %rf, %dst */
+ addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
ppIRExpr(e);
vpanic("iselFltExpr_wrk");
}