Many amd64 FP cases, including conversion to/from int (tedious stuff).
git-svn-id: svn://svn.valgrind.org/vex/trunk@951 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host-amd64/hdefs.c b/priv/host-amd64/hdefs.c
index 0eeb993..79a3c0c 100644
--- a/priv/host-amd64/hdefs.c
+++ b/priv/host-amd64/hdefs.c
@@ -572,8 +572,8 @@
case Asse_SUBF: return "sub";
case Asse_MULF: return "mul";
case Asse_DIVF: return "div";
-//.. case Xsse_MAXF: return "max";
-//.. case Xsse_MINF: return "min";
+ case Asse_MAXF: return "max";
+ case Asse_MINF: return "min";
//.. case Xsse_CMPEQF: return "cmpFeq";
//.. case Xsse_CMPLTF: return "cmpFlt";
//.. case Xsse_CMPLEF: return "cmpFle";
@@ -848,13 +848,12 @@
//.. vassert(cond != Xcc_ALWAYS);
//.. return i;
//.. }
-//.. AMD64Instr* AMD64Instr_FpLdStCW ( Bool isLoad, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdStCW;
-//.. i->Xin.FpLdStCW.isLoad = isLoad;
-//.. i->Xin.FpLdStCW.addr = addr;
-//.. return i;
-//.. }
+AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_LdMXCSR;
+ i->Ain.LdMXCSR.addr = addr;
+ return i;
+}
//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
//.. i->tag = Xin_FpStSW_AX;
@@ -870,7 +869,28 @@
vassert(sz == 4 || sz == 8);
return i;
}
-
+AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSI2SF;
+ i->Ain.SseSI2SF.szS = szS;
+ i->Ain.SseSI2SF.szD = szD;
+ i->Ain.SseSI2SF.src = src;
+ i->Ain.SseSI2SF.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSF2SI;
+ i->Ain.SseSF2SI.szS = szS;
+ i->Ain.SseSF2SI.szD = szD;
+ i->Ain.SseSF2SI.src = src;
+ i->Ain.SseSF2SI.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
//.. i->tag = Xin_SseConst;
@@ -1160,6 +1180,10 @@
//.. case Xin_FpStSW_AX:
//.. vex_printf("fstsw %%ax");
//.. return;
+ case Ain_LdMXCSR:
+ vex_printf("ldmxcsr ");
+ ppAMD64AMode(i->Ain.LdMXCSR.addr);
+ break;
case Ain_SseUComIS:
vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
ppHRegAMD64(i->Ain.SseUComIS.srcL);
@@ -1168,6 +1192,20 @@
vex_printf(" ; pushfq ; popq ");
ppHRegAMD64(i->Ain.SseUComIS.dst);
break;
+ case Ain_SseSI2SF:
+ vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
+ (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSI2SF.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseSI2SF.dst);
+ break;
+ case Ain_SseSF2SI:
+ vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
+ ppHRegAMD64(i->Ain.SseSF2SI.src);
+ vex_printf(",");
+ (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSF2SI.dst);
+ break;
//.. case Xin_SseConst:
//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
//.. ppHRegAMD64(i->Xin.SseConst.dst);
@@ -1412,9 +1450,9 @@
//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
//.. return;
-//.. case Xin_FpLdStCW:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStCW.addr);
-//.. return;
+ case Ain_LdMXCSR:
+ addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
+ return;
//.. case Xin_FpStSW_AX:
//.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
//.. return;
@@ -1423,6 +1461,14 @@
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
return;
+ case Ain_SseSI2SF:
+ addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
+ return;
case Ain_SseLdSt:
addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
@@ -1596,9 +1642,9 @@
//.. mapReg(m, &i->Xin.FpCMov.src);
//.. mapReg(m, &i->Xin.FpCMov.dst);
//.. return;
-//.. case Xin_FpLdStCW:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStCW.addr);
-//.. return;
+ case Ain_LdMXCSR:
+ mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
+ return;
//.. case Xin_FpStSW_AX:
//.. return;
case Ain_SseUComIS:
@@ -1606,6 +1652,14 @@
mapReg(m, &i->Ain.SseUComIS.srcR);
mapReg(m, &i->Ain.SseUComIS.dst);
return;
+ case Ain_SseSI2SF:
+ mapReg(m, &i->Ain.SseSI2SF.src);
+ mapReg(m, &i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ mapReg(m, &i->Ain.SseSF2SI.src);
+ mapReg(m, &i->Ain.SseSF2SI.dst);
+ return;
//.. case Xin_SseConst:
//.. mapReg(m, &i->Xin.SseConst.dst);
//.. return;
@@ -2423,13 +2477,14 @@
*p++ = 0xFF;
p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
goto done;
-//.. case Xrmi_Imm:
-//.. *p++ = 0x68;
-//.. p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
-//.. goto done;
-//.. case Xrmi_Reg:
-//.. *p++ = 0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg);
-//.. goto done;
+ case Armi_Imm:
+ *p++ = 0x68;
+ p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
+ goto done;
+ case Armi_Reg:
+ *p++ = 0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg));
+ *p++ = 0x50 + iregNo(i->Ain.Push.src->Armi.Reg.reg);
+ goto done;
default:
goto bad;
}
@@ -2818,16 +2873,14 @@
//.. /* Fill in the jump offset. */
//.. *(ptmp-1) = p - ptmp;
//.. goto done;
-//..
-//.. case Xin_FpLdStCW:
-//.. if (i->Xin.FpLdStCW.isLoad) {
-//.. *p++ = 0xD9;
-//.. p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdStCW.addr);
-//.. } else {
-//.. vassert(0);
-//.. }
-//.. goto done;
-//..
+
+ case Ain_LdMXCSR:
+ *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
+ *p++ = 0x0F;
+ *p++ = 0xAE;
+ p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
+ goto done;
+
//.. case Xin_FpStSW_AX:
//.. /* note, this emits fnstsw %ax, not fstsw %ax */
//.. *p++ = 0xDF;
@@ -2857,6 +2910,30 @@
*p++ = 0x58 + iregNo(i->Ain.SseUComIS.dst);
goto done;
+ case Ain_SseSI2SF:
+ /* cvssi2s[sd] %src, %dst */
+ rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ *p++ = i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2;
+ *p++ = i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex;
+ *p++ = 0x0F;
+ *p++ = 0x2A;
+ p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ goto done;
+
+ case Ain_SseSF2SI:
+ /* cvss[sd]2si %src, %dst */
+ rex = rexAMode_R( i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ *p++ = i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2;
+ *p++ = i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex;
+ *p++ = 0x0F;
+ *p++ = 0x2D;
+ p = doAMode_R( p, i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ goto done;
+
//..
//.. case Xin_FpCmp:
//.. /* gcmp %fL, %fR, %dst
@@ -3011,7 +3088,7 @@
switch (i->Ain.Sse64FLo.op) {
case Asse_ADDF: *p++ = 0x58; break;
case Asse_DIVF: *p++ = 0x5E; break;
-//.. case Xsse_MAXF: *p++ = 0x5F; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
//.. case Xsse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
//.. case Xsse_RCPF: *p++ = 0x53; break;
@@ -3040,7 +3117,7 @@
case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
//.. case Xsse_OR: XX(rex); XX(0x0F); XX(0x56); break;
case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
-//.. case Xsse_AND: XX(rex); XX(0x0F); XX(0x54); break;
+ case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
//.. case Xsse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
//.. case Xsse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
//.. case Xsse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;