Make floating-point comparisons work, and fill in a bunch of other x86
FP -> IR cases.
git-svn-id: svn://svn.valgrind.org/vex/trunk@245 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest-x86/toIR.c b/priv/guest-x86/toIR.c
index 19c285c..19f2920 100644
--- a/priv/guest-x86/toIR.c
+++ b/priv/guest-x86/toIR.c
@@ -3099,6 +3099,17 @@
stmt( IRStmt_Put( OFFB_FTOP, e ) );
}
+/* --------- Get/set the C320 bits of the control word. --------- */
+
+static IRExpr* get_C320 ( void )
+{
+ return IRExpr_Get( OFFB_FC320, Ity_I32 );
+}
+
+static void put_C320 ( IRExpr* e )
+{
+ stmt( IRStmt_Put( OFFB_FC320, e ) );
+}
/* --------- Get/set the FPU control word. --------- */
/* Note, IA32 has this as a 16-bit value, so fstcw/fldcw need to cast
@@ -3293,13 +3304,32 @@
Check dst and src tags when reading but not on write.
*/
static
-void fp_do_op_ST_ST ( UChar* op_txt, IROp op, UInt st_src, UInt st_dst )
+void fp_do_op_ST_ST ( UChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
{
- DIP("f%s st(%d), st(%d)\n", op_txt, st_src, st_dst );
+ DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", st_src, st_dst );
put_ST_UNCHECKED(
st_dst,
binop(op, get_ST(st_dst), get_ST(st_src) )
);
+ if (pop_after)
+ fp_pop();
+}
+
+/* ST(dst) = ST(src) `op` ST(dst).
+ Check dst and src tags when reading but not on write.
+*/
+static
+void fp_do_oprev_ST_ST ( UChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
+{
+ DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", st_src, st_dst );
+ put_ST_UNCHECKED(
+ st_dst,
+ binop(op, get_ST(st_src), get_ST(st_dst) )
+ );
+ if (pop_after)
+ fp_pop();
}
@@ -3332,6 +3362,10 @@
fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
break;
+ case 6: /* FDIV single-real */
+ fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
+ break;
+
default:
vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
vex_printf("first_opcode == 0xD8\n");
@@ -3342,11 +3376,11 @@
switch (modrm) {
case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
- fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0 );
+ fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
break;
case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
- fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0 );
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
break;
default:
@@ -3497,7 +3531,23 @@
goto decode_fail;
}
} else {
- goto decode_fail;
+
+ delta++;
+ switch (modrm) {
+
+ case 0xE9: /* FUCOMPP %st(0),%st(1) */
+ DIP("fucompp %%st(0),%%st(1)\n");
+ put_C320( binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(1)),
+ mkU8(8)) );
+ fp_pop();
+ fp_pop();
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
}
}
@@ -3567,7 +3617,18 @@
}
} else {
- goto decode_fail;
+
+ delta++;
+ switch (modrm) {
+
+ case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
}
}
@@ -3609,6 +3670,7 @@
} else {
delta++;
switch (modrm) {
+
case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
r_dst = (UInt)modrm - 0xD8;
DIP("fstp %%st(0),%%st(%d)\n", r_dst);
@@ -3618,6 +3680,24 @@
put_ST_UNCHECKED(r_dst, get_ST(0));
fp_pop();
break;
+
+ case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE0;
+ DIP("fucom %%st(0),%%st(%d)\n", r_dst);
+ put_C320( binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)) );
+ break;
+
+ case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE8;
+ DIP("fucomp %%st(0),%%st(%d)\n", r_dst);
+ put_C320( binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)) );
+ fp_pop();
+ break;
+
default:
goto decode_fail;
}
@@ -3627,7 +3707,32 @@
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
else
if (first_opcode == 0xDE) {
- goto decode_fail;
+
+ if (modrm < 0xC0) {
+ goto decode_fail;
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
}
/* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
@@ -3650,14 +3755,12 @@
fp_pop();
break;
-#if 0
case 5: /* FILD m64 */
DIP("fildll %s\n", dis_buf);
fp_push();
put_ST(0, unop(Iop_I64toF64,
loadLE(Ity_I64, mkexpr(addr))));
break;
-#endif
default:
vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
@@ -3666,7 +3769,29 @@
}
} else {
- goto decode_fail;
+
+ delta++;
+ switch (modrm) {
+
+ case 0xE0: /* FNSTSW %ax */
+ DIP("fnstsw %%ax\n");
+ /* Invent a plausible-looking FPU status word value and
+ dump it in %AX:
+ ((ftop & 7) << 11) | (c320 & 0x4500)
+ */
+ putIReg(2, R_EAX,
+ unop(Iop_32to16,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, get_ftop(), mkU32(7)),
+ mkU8(11)),
+ binop(Iop_And32, get_C320(), mkU32(0x4500))
+ )));
+ break;
+
+ default:
+ goto decode_fail;
+ }
}
}
@@ -4357,34 +4482,29 @@
}
-//-- static
-//-- void codegen_SAHF ( UCodeBlock* cb )
-//-- {
-//-- Int t = newTemp(cb);
-//-- Int t2 = newTemp(cb);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t);
-//--
-//-- /* Mask out parts of t not corresponding to %AH. This stops the
-//-- instrumenter complaining if they are undefined. Otherwise, the
-//-- instrumenter would check all 32 bits of t at the PUSH, which
-//-- could be the cause of incorrect warnings. Discovered by Daniel
-//-- Veillard <veillard@redhat.com>.
-//-- */
-//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
-//-- uLiteral(cb, 0x0000FF00);
-//-- uInstr2(cb, AND, 4, TempReg, t2, TempReg, t);
-//-- /* We deliberately don't set the condition codes here, since this
-//-- AND is purely internal to Valgrind and nothing to do with the
-//-- client's state. */
-//--
-//-- uInstr0(cb, CALLM_S, 0);
-//-- uInstr1(cb, PUSH, 4, TempReg, t);
-//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_SAHF));
-//-- uFlagsRWU(cb, FlagsEmpty, FlagsSZACP, FlagsEmpty);
-//-- uInstr1(cb, CLEAR, 0, Lit16, 4);
-//-- uInstr0(cb, CALLM_E, 0);
-//-- }
-//--
+static
+void codegen_SAHF ( void )
+{
+ /* Set the flags to:
+ (calculate_flags_all() & CC_MASK_O) -- retain the old O flag
+ | (%AH & (CC_MASK_S|CC_MASK_Z|CC_MASK_A|CC_MASK_P|CC_MASK_C)
+ */
+ UInt mask_SZACP = CC_MASK_S|CC_MASK_Z|CC_MASK_A|CC_MASK_P|CC_MASK_C;
+ IRTemp oldflags = newTemp(Ity_I32);
+ assign( oldflags, mk_calculate_eflags_all() );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DST, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_SRC,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkexpr(oldflags), mkU32(CC_MASK_O)),
+ binop(Iop_And32,
+ binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
+ mkU32(mask_SZACP))
+ )
+ ));
+}
+
+
//-- static
//-- void codegen_LAHF ( UCodeBlock* cb )
//-- {
@@ -6877,23 +6997,23 @@
DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
break;
-//-- /* ------------------------ FPU ops -------------------- */
-//--
-//-- case 0x9E: /* SAHF */
-//-- codegen_SAHF ( cb );
-//-- DIP("sahf\n");
-//-- break;
-//--
+ /* ------------------------ FPU ops -------------------- */
+
+ case 0x9E: /* SAHF */
+ codegen_SAHF();
+ DIP("sahf\n");
+ break;
+
//-- case 0x9F: /* LAHF */
//-- codegen_LAHF ( cb );
//-- DIP("lahf\n");
//-- break;
//--
-//-- case 0x9B: /* FWAIT */
-//-- /* ignore? */
-//-- DIP("fwait\n");
-//-- break;
-//--
+ case 0x9B: /* FWAIT */
+ /* ignore? */
+ DIP("fwait\n");
+ break;
+
case 0xD8:
case 0xD9:
case 0xDA:
diff --git a/priv/host-x86/hdefs.c b/priv/host-x86/hdefs.c
index 070d4eb..dd47237 100644
--- a/priv/host-x86/hdefs.c
+++ b/priv/host-x86/hdefs.c
@@ -607,6 +607,15 @@
i->Xin.FpLdStCW.addr = addr;
return i;
}
+X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst )
+{
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpCmp;
+ i->Xin.FpCmp.srcL = srcL;
+ i->Xin.FpCmp.srcR = srcR;
+ i->Xin.FpCmp.dst = dst;
+ return i;
+}
void ppX86Instr ( X86Instr* i ) {
@@ -771,6 +780,14 @@
vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
ppX86AMode(i->Xin.FpLdStCW.addr);
return;
+ case Xin_FpCmp:
+ vex_printf("gcmp ");
+ ppHRegX86(i->Xin.FpCmp.srcL);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpCmp.srcR);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpCmp.dst);
+ break;
default:
vpanic("ppX86Instr");
}
@@ -890,6 +907,12 @@
case Xin_FpLdStCW:
addRegUsage_X86AMode(u, i->Xin.FpLdStCW.addr);
return;
+ case Xin_FpCmp:
+ addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
+ addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
+ addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
+ addHRegUse(u, HRmWrite, hregX86_EAX());
+ return;
default:
ppX86Instr(i);
vpanic("getRegUsage_X86Instr");
@@ -981,6 +1004,11 @@
case Xin_FpLdStCW:
mapRegs_X86AMode(m, i->Xin.FpLdStCW.addr);
return;
+ case Xin_FpCmp:
+ mapReg(m, &i->Xin.FpCmp.srcL);
+ mapReg(m, &i->Xin.FpCmp.srcR);
+ mapReg(m, &i->Xin.FpCmp.dst);
+ return;
default:
ppX86Instr(i);
vpanic("mapRegs_X86Instr");
@@ -1797,7 +1825,7 @@
--> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
*/
switch (i->Xin.FpLdStI.sz) {
- case 8: vassert(0); opc = 0xDF; subopc_imm = 5; break;
+ case 8: opc = 0xDF; subopc_imm = 5; break;
case 4: opc = 0xDB; subopc_imm = 0; break;
case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
@@ -1825,29 +1853,6 @@
}
break;
-#if 0
- case Xin_FpI64:
- if (i->Xin.FpI64.toInt) {
- vassert(0);
- } else {
- /* gi64tof64 %hi:%lo %fakeN
- --> ffree %st7; pushl hi ; pushl lo ; fildll 0(%esp) ;
- addl $8,%esp ; fstpl %st(N+1)
- */
- /* ffree %st(7) */
- p = do_ffree_st7(p);
- /* pushl %hi ; pushl %lo */
- *p++ = 0x50 + iregNo(i->Xin.FpI64.iregHi);
- *p++ = 0x50 + iregNo(i->Xin.FpI64.iregLo);
- /* fildll 0(%esp) */
- *p++ = 0xDF; *p++ = 0x6C; *p++ = 0x24; *p++ = 0x00;
- /* addl $8, %esp */
- *p++ = 0x83; *p++ = 0xC4; *p++ = 0x08;
- p = do_fstp_st(p, 1+fregNo(i->Xin.FpI64.freg));
- goto done;
- }
-#endif
-
case Xin_FpCMov:
/* jmp fwds if !condition */
*p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
@@ -1856,8 +1861,8 @@
/* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
p = do_ffree_st7(p);
- p = do_fld_st(p, 0+hregNumber(i->Xin.FpCMov.src));
- p = do_fstp_st(p, 1+hregNumber(i->Xin.FpCMov.dst));
+ p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
+ p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
/* Fill in the jump offset. */
*(ptmp-1) = p - ptmp;
@@ -1872,6 +1877,27 @@
}
goto done;
+
+ case Xin_FpCmp:
+ /* gcmp %fL, %fR, %dst
+ -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
+ fnstsw %ax ; movl %eax, %dst
+ */
+ /* ffree %st7 */
+ p = do_ffree_st7(p);
+ /* fpush %fL */
+ p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
+ /* fucomp %(fR+1) */
+ *p++ = 0xDD;
+ *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
+ /* fnstsw %ax */
+ *p++ = 0xDF;
+ *p++ = 0xE0;
+ /* movl %eax, %dst */
+ *p++ = 0x89;
+ p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
+ goto done;
+
default:
goto bad;
}
diff --git a/priv/host-x86/hdefs.h b/priv/host-x86/hdefs.h
index a657ec2..0eff2dc 100644
--- a/priv/host-x86/hdefs.h
+++ b/priv/host-x86/hdefs.h
@@ -280,7 +280,8 @@
Xin_FpLdSt, /* FP fake load/store */
Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
Xin_FpCMov, /* FP fake floating point (un)conditional move */
- Xin_FpLdStCW /* fldcw / fstcw */
+ Xin_FpLdStCW, /* fldcw / fstcw */
+ Xin_FpCmp /* FP compare, generating a C320 value into int reg */
}
X86InstrTag;
@@ -420,6 +421,12 @@
X86AMode* addr;
}
FpLdStCW;
+ /* Do a compare, generating the C320 bits into the dst. */
+ struct {
+ HReg srcL;
+ HReg srcR;
+ HReg dst;
+ } FpCmp;
} Xin;
}
X86Instr;
@@ -447,6 +454,7 @@
extern X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, X86AMode* );
extern X86Instr* X86Instr_FpCMov ( X86CondCode, HReg src, HReg dst );
extern X86Instr* X86Instr_FpLdStCW ( Bool isLoad, X86AMode* );
+extern X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst );
extern void ppX86Instr ( X86Instr* );
diff --git a/priv/host-x86/isel.c b/priv/host-x86/isel.c
index 65984ed..6942256 100644
--- a/priv/host-x86/isel.c
+++ b/priv/host-x86/isel.c
@@ -502,6 +502,17 @@
return b16;
}
+ if (e->Iex.Binop.op == Iop_CmpF64) {
+ HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ addInstr(env, X86Instr_FpCmp(fL,fR,dst));
+ /* shift this right 8 bits so as to conform to CmpF64
+ definition. */
+ addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, X86RM_Reg(dst)));
+ return dst;
+ }
+
if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
@@ -518,7 +529,7 @@
mode accordingly. */
/* Create a space, both for the control word messing, and for
- the actual store conversion.
+ the actual store conversion. */
/* subl $4, %esp */
addInstr(env,
X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(4), hregX86_ESP()));
@@ -1182,6 +1193,23 @@
return;
}
+ /* 64-bit load */
+ if (e->tag == Iex_LDle) {
+ vassert(e->Iex.LDle.ty == Ity_I64);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg rA = iselIntExpr_R(env, e->Iex.LDle.addr);
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV,
+ X86RMI_Mem(X86AMode_IR(0, rA)), tLo));
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV,
+ X86RMI_Mem(X86AMode_IR(4, rA)), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
/* 32 x 32 -> 64 multiply */
if (e->tag == Iex_Binop
&& (e->Iex.Binop.op == Iop_MullU32
@@ -1490,6 +1518,20 @@
hregX86_ESP()));
return dst;
}
+ case Iop_I64toF64: {
+ HReg dst = newVRegF(env);
+ HReg rHi,rLo;
+ iselIntExpr64( &rHi, &rLo, env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+ addInstr(env, X86Instr_FpLdStI(
+ True/*load*/, 8, dst,
+ X86AMode_IR(0, hregX86_ESP())));
+ addInstr(env, X86Instr_Alu32R(Xalu_ADD,
+ X86RMI_Imm(8),
+ hregX86_ESP()));
+ return dst;
+ }
case Iop_F32toF64:
/* this is a no-op */
return iselFltExpr(env, e->Iex.Unop.arg);
diff --git a/priv/ir/irdefs.c b/priv/ir/irdefs.c
index acda76c..732b733 100644
--- a/priv/ir/irdefs.c
+++ b/priv/ir/irdefs.c
@@ -133,7 +133,11 @@
case Iop_SubF64: vex_printf("SubF64"); return;
case Iop_MulF64: vex_printf("MulF64"); return;
case Iop_DivF64: vex_printf("DivF64"); return;
+ case Iop_CmpF64: vex_printf("CmpF64"); return;
+
case Iop_I32toF64: vex_printf("I32toF64"); return;
+ case Iop_I64toF64: vex_printf("I64toF64"); return;
+
case Iop_F64toI32: vex_printf("F64toI32"); return;
case Iop_F64toI16: vex_printf("F64toI16"); return;
case Iop_F32toF64: vex_printf("F32toF64"); return;
@@ -622,8 +626,11 @@
case Iop_AddF64: case Iop_SubF64:
case Iop_MulF64: case Iop_DivF64:
BINARY(Ity_F64,Ity_F64,Ity_F64);
+ case Iop_CmpF64:
+ BINARY(Ity_I32,Ity_F64,Ity_F64);
case Iop_I32toF64: UNARY(Ity_F64,Ity_I32);
+ case Iop_I64toF64: UNARY(Ity_F64,Ity_I64);
case Iop_F64toI32: BINARY(Ity_I32, Ity_I32,Ity_F64);
case Iop_F64toI16: BINARY(Ity_I16, Ity_I32,Ity_F64);