| |
| /*---------------------------------------------------------------*/ |
| /*--- begin host_x86_defs.c ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* |
| This file is part of Valgrind, a dynamic binary instrumentation |
| framework. |
| |
| Copyright (C) 2004-2012 OpenWorks LLP |
| info@open-works.net |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. |
| |
| The GNU General Public License is contained in the file COPYING. |
| |
| Neither the names of the U.S. Department of Energy nor the |
| University of California nor the names of its contributors may be |
| used to endorse or promote products derived from this software |
| without prior written permission. |
| */ |
| |
| #include "libvex_basictypes.h" |
| #include "libvex.h" |
| #include "libvex_trc_values.h" |
| |
| #include "main_util.h" |
| #include "host_generic_regs.h" |
| #include "host_x86_defs.h" |
| |
| |
| /* --------- Registers. --------- */ |
| |
| void ppHRegX86 ( HReg reg ) |
| { |
| Int r; |
| static const HChar* ireg32_names[8] |
| = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; |
| /* Be generic for all virtual regs. */ |
| if (hregIsVirtual(reg)) { |
| ppHReg(reg); |
| return; |
| } |
| /* But specific for real regs. */ |
| switch (hregClass(reg)) { |
| case HRcInt32: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 8); |
| vex_printf("%s", ireg32_names[r]); |
| return; |
| case HRcFlt64: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 6); |
| vex_printf("%%fake%d", r); |
| return; |
| case HRcVec128: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 8); |
| vex_printf("%%xmm%d", r); |
| return; |
| default: |
| vpanic("ppHRegX86"); |
| } |
| } |
| |
| HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); } |
| HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); } |
| HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); } |
| HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); } |
| HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); } |
| HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); } |
| HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); } |
| HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); } |
| |
| HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } |
| HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } |
| HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } |
| HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } |
| HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } |
| HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } |
| |
| HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); } |
| HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); } |
| HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); } |
| HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); } |
| HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); } |
| HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); } |
| HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); } |
| HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); } |
| |
| |
| void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) |
| { |
| *nregs = 20; |
| *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); |
| (*arr)[0] = hregX86_EAX(); |
| (*arr)[1] = hregX86_EBX(); |
| (*arr)[2] = hregX86_ECX(); |
| (*arr)[3] = hregX86_EDX(); |
| (*arr)[4] = hregX86_ESI(); |
| (*arr)[5] = hregX86_EDI(); |
| (*arr)[6] = hregX86_FAKE0(); |
| (*arr)[7] = hregX86_FAKE1(); |
| (*arr)[8] = hregX86_FAKE2(); |
| (*arr)[9] = hregX86_FAKE3(); |
| (*arr)[10] = hregX86_FAKE4(); |
| (*arr)[11] = hregX86_FAKE5(); |
| (*arr)[12] = hregX86_XMM0(); |
| (*arr)[13] = hregX86_XMM1(); |
| (*arr)[14] = hregX86_XMM2(); |
| (*arr)[15] = hregX86_XMM3(); |
| (*arr)[16] = hregX86_XMM4(); |
| (*arr)[17] = hregX86_XMM5(); |
| (*arr)[18] = hregX86_XMM6(); |
| (*arr)[19] = hregX86_XMM7(); |
| } |
| |
| |
| /* --------- Condition codes, Intel encoding. --------- */ |
| |
| const HChar* showX86CondCode ( X86CondCode cond ) |
| { |
| switch (cond) { |
| case Xcc_O: return "o"; |
| case Xcc_NO: return "no"; |
| case Xcc_B: return "b"; |
| case Xcc_NB: return "nb"; |
| case Xcc_Z: return "z"; |
| case Xcc_NZ: return "nz"; |
| case Xcc_BE: return "be"; |
| case Xcc_NBE: return "nbe"; |
| case Xcc_S: return "s"; |
| case Xcc_NS: return "ns"; |
| case Xcc_P: return "p"; |
| case Xcc_NP: return "np"; |
| case Xcc_L: return "l"; |
| case Xcc_NL: return "nl"; |
| case Xcc_LE: return "le"; |
| case Xcc_NLE: return "nle"; |
| case Xcc_ALWAYS: return "ALWAYS"; |
| default: vpanic("ppX86CondCode"); |
| } |
| } |
| |
| |
| /* --------- X86AMode: memory address expressions. --------- */ |
| |
| X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { |
| X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); |
| am->tag = Xam_IR; |
| am->Xam.IR.imm = imm32; |
| am->Xam.IR.reg = reg; |
| return am; |
| } |
| X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { |
| X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); |
| am->tag = Xam_IRRS; |
| am->Xam.IRRS.imm = imm32; |
| am->Xam.IRRS.base = base; |
| am->Xam.IRRS.index = indEx; |
| am->Xam.IRRS.shift = shift; |
| vassert(shift >= 0 && shift <= 3); |
| return am; |
| } |
| |
| X86AMode* dopyX86AMode ( X86AMode* am ) { |
| switch (am->tag) { |
| case Xam_IR: |
| return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); |
| case Xam_IRRS: |
| return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, |
| am->Xam.IRRS.index, am->Xam.IRRS.shift ); |
| default: |
| vpanic("dopyX86AMode"); |
| } |
| } |
| |
| void ppX86AMode ( X86AMode* am ) { |
| switch (am->tag) { |
| case Xam_IR: |
| if (am->Xam.IR.imm == 0) |
| vex_printf("("); |
| else |
| vex_printf("0x%x(", am->Xam.IR.imm); |
| ppHRegX86(am->Xam.IR.reg); |
| vex_printf(")"); |
| return; |
| case Xam_IRRS: |
| vex_printf("0x%x(", am->Xam.IRRS.imm); |
| ppHRegX86(am->Xam.IRRS.base); |
| vex_printf(","); |
| ppHRegX86(am->Xam.IRRS.index); |
| vex_printf(",%d)", 1 << am->Xam.IRRS.shift); |
| return; |
| default: |
| vpanic("ppX86AMode"); |
| } |
| } |
| |
| static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { |
| switch (am->tag) { |
| case Xam_IR: |
| addHRegUse(u, HRmRead, am->Xam.IR.reg); |
| return; |
| case Xam_IRRS: |
| addHRegUse(u, HRmRead, am->Xam.IRRS.base); |
| addHRegUse(u, HRmRead, am->Xam.IRRS.index); |
| return; |
| default: |
| vpanic("addRegUsage_X86AMode"); |
| } |
| } |
| |
| static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { |
| switch (am->tag) { |
| case Xam_IR: |
| am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); |
| return; |
| case Xam_IRRS: |
| am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); |
| am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); |
| return; |
| default: |
| vpanic("mapRegs_X86AMode"); |
| } |
| } |
| |
| /* --------- Operand, which can be reg, immediate or memory. --------- */ |
| |
| X86RMI* X86RMI_Imm ( UInt imm32 ) { |
| X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); |
| op->tag = Xrmi_Imm; |
| op->Xrmi.Imm.imm32 = imm32; |
| return op; |
| } |
| X86RMI* X86RMI_Reg ( HReg reg ) { |
| X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); |
| op->tag = Xrmi_Reg; |
| op->Xrmi.Reg.reg = reg; |
| return op; |
| } |
| X86RMI* X86RMI_Mem ( X86AMode* am ) { |
| X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); |
| op->tag = Xrmi_Mem; |
| op->Xrmi.Mem.am = am; |
| return op; |
| } |
| |
| void ppX86RMI ( X86RMI* op ) { |
| switch (op->tag) { |
| case Xrmi_Imm: |
| vex_printf("$0x%x", op->Xrmi.Imm.imm32); |
| return; |
| case Xrmi_Reg: |
| ppHRegX86(op->Xrmi.Reg.reg); |
| return; |
| case Xrmi_Mem: |
| ppX86AMode(op->Xrmi.Mem.am); |
| return; |
| default: |
| vpanic("ppX86RMI"); |
| } |
| } |
| |
| /* An X86RMI can only be used in a "read" context (what would it mean |
| to write or modify a literal?) and so we enumerate its registers |
| accordingly. */ |
| static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { |
| switch (op->tag) { |
| case Xrmi_Imm: |
| return; |
| case Xrmi_Reg: |
| addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); |
| return; |
| case Xrmi_Mem: |
| addRegUsage_X86AMode(u, op->Xrmi.Mem.am); |
| return; |
| default: |
| vpanic("addRegUsage_X86RMI"); |
| } |
| } |
| |
| static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { |
| switch (op->tag) { |
| case Xrmi_Imm: |
| return; |
| case Xrmi_Reg: |
| op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); |
| return; |
| case Xrmi_Mem: |
| mapRegs_X86AMode(m, op->Xrmi.Mem.am); |
| return; |
| default: |
| vpanic("mapRegs_X86RMI"); |
| } |
| } |
| |
| |
| /* --------- Operand, which can be reg or immediate only. --------- */ |
| |
| X86RI* X86RI_Imm ( UInt imm32 ) { |
| X86RI* op = LibVEX_Alloc(sizeof(X86RI)); |
| op->tag = Xri_Imm; |
| op->Xri.Imm.imm32 = imm32; |
| return op; |
| } |
| X86RI* X86RI_Reg ( HReg reg ) { |
| X86RI* op = LibVEX_Alloc(sizeof(X86RI)); |
| op->tag = Xri_Reg; |
| op->Xri.Reg.reg = reg; |
| return op; |
| } |
| |
| void ppX86RI ( X86RI* op ) { |
| switch (op->tag) { |
| case Xri_Imm: |
| vex_printf("$0x%x", op->Xri.Imm.imm32); |
| return; |
| case Xri_Reg: |
| ppHRegX86(op->Xri.Reg.reg); |
| return; |
| default: |
| vpanic("ppX86RI"); |
| } |
| } |
| |
| /* An X86RI can only be used in a "read" context (what would it mean |
| to write or modify a literal?) and so we enumerate its registers |
| accordingly. */ |
| static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { |
| switch (op->tag) { |
| case Xri_Imm: |
| return; |
| case Xri_Reg: |
| addHRegUse(u, HRmRead, op->Xri.Reg.reg); |
| return; |
| default: |
| vpanic("addRegUsage_X86RI"); |
| } |
| } |
| |
| static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { |
| switch (op->tag) { |
| case Xri_Imm: |
| return; |
| case Xri_Reg: |
| op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); |
| return; |
| default: |
| vpanic("mapRegs_X86RI"); |
| } |
| } |
| |
| |
| /* --------- Operand, which can be reg or memory only. --------- */ |
| |
| X86RM* X86RM_Reg ( HReg reg ) { |
| X86RM* op = LibVEX_Alloc(sizeof(X86RM)); |
| op->tag = Xrm_Reg; |
| op->Xrm.Reg.reg = reg; |
| return op; |
| } |
| X86RM* X86RM_Mem ( X86AMode* am ) { |
| X86RM* op = LibVEX_Alloc(sizeof(X86RM)); |
| op->tag = Xrm_Mem; |
| op->Xrm.Mem.am = am; |
| return op; |
| } |
| |
| void ppX86RM ( X86RM* op ) { |
| switch (op->tag) { |
| case Xrm_Mem: |
| ppX86AMode(op->Xrm.Mem.am); |
| return; |
| case Xrm_Reg: |
| ppHRegX86(op->Xrm.Reg.reg); |
| return; |
| default: |
| vpanic("ppX86RM"); |
| } |
| } |
| |
| /* Because an X86RM can be both a source or destination operand, we |
| have to supply a mode -- pertaining to the operand as a whole -- |
| indicating how it's being used. */ |
| static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { |
| switch (op->tag) { |
| case Xrm_Mem: |
| /* Memory is read, written or modified. So we just want to |
| know the regs read by the amode. */ |
| addRegUsage_X86AMode(u, op->Xrm.Mem.am); |
| return; |
| case Xrm_Reg: |
| /* reg is read, written or modified. Add it in the |
| appropriate way. */ |
| addHRegUse(u, mode, op->Xrm.Reg.reg); |
| return; |
| default: |
| vpanic("addRegUsage_X86RM"); |
| } |
| } |
| |
| static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) |
| { |
| switch (op->tag) { |
| case Xrm_Mem: |
| mapRegs_X86AMode(m, op->Xrm.Mem.am); |
| return; |
| case Xrm_Reg: |
| op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); |
| return; |
| default: |
| vpanic("mapRegs_X86RM"); |
| } |
| } |
| |
| |
| /* --------- Instructions. --------- */ |
| |
| const HChar* showX86UnaryOp ( X86UnaryOp op ) { |
| switch (op) { |
| case Xun_NOT: return "not"; |
| case Xun_NEG: return "neg"; |
| default: vpanic("showX86UnaryOp"); |
| } |
| } |
| |
| const HChar* showX86AluOp ( X86AluOp op ) { |
| switch (op) { |
| case Xalu_MOV: return "mov"; |
| case Xalu_CMP: return "cmp"; |
| case Xalu_ADD: return "add"; |
| case Xalu_SUB: return "sub"; |
| case Xalu_ADC: return "adc"; |
| case Xalu_SBB: return "sbb"; |
| case Xalu_AND: return "and"; |
| case Xalu_OR: return "or"; |
| case Xalu_XOR: return "xor"; |
| case Xalu_MUL: return "mul"; |
| default: vpanic("showX86AluOp"); |
| } |
| } |
| |
| const HChar* showX86ShiftOp ( X86ShiftOp op ) { |
| switch (op) { |
| case Xsh_SHL: return "shl"; |
| case Xsh_SHR: return "shr"; |
| case Xsh_SAR: return "sar"; |
| default: vpanic("showX86ShiftOp"); |
| } |
| } |
| |
| const HChar* showX86FpOp ( X86FpOp op ) { |
| switch (op) { |
| case Xfp_ADD: return "add"; |
| case Xfp_SUB: return "sub"; |
| case Xfp_MUL: return "mul"; |
| case Xfp_DIV: return "div"; |
| case Xfp_SCALE: return "scale"; |
| case Xfp_ATAN: return "atan"; |
| case Xfp_YL2X: return "yl2x"; |
| case Xfp_YL2XP1: return "yl2xp1"; |
| case Xfp_PREM: return "prem"; |
| case Xfp_PREM1: return "prem1"; |
| case Xfp_SQRT: return "sqrt"; |
| case Xfp_ABS: return "abs"; |
| case Xfp_NEG: return "chs"; |
| case Xfp_MOV: return "mov"; |
| case Xfp_SIN: return "sin"; |
| case Xfp_COS: return "cos"; |
| case Xfp_TAN: return "tan"; |
| case Xfp_ROUND: return "round"; |
| case Xfp_2XM1: return "2xm1"; |
| default: vpanic("showX86FpOp"); |
| } |
| } |
| |
| const HChar* showX86SseOp ( X86SseOp op ) { |
| switch (op) { |
| case Xsse_MOV: return "mov(?!)"; |
| case Xsse_ADDF: return "add"; |
| case Xsse_SUBF: return "sub"; |
| case Xsse_MULF: return "mul"; |
| case Xsse_DIVF: return "div"; |
| case Xsse_MAXF: return "max"; |
| case Xsse_MINF: return "min"; |
| case Xsse_CMPEQF: return "cmpFeq"; |
| case Xsse_CMPLTF: return "cmpFlt"; |
| case Xsse_CMPLEF: return "cmpFle"; |
| case Xsse_CMPUNF: return "cmpFun"; |
| case Xsse_RCPF: return "rcp"; |
| case Xsse_RSQRTF: return "rsqrt"; |
| case Xsse_SQRTF: return "sqrt"; |
| case Xsse_AND: return "and"; |
| case Xsse_OR: return "or"; |
| case Xsse_XOR: return "xor"; |
| case Xsse_ANDN: return "andn"; |
| case Xsse_ADD8: return "paddb"; |
| case Xsse_ADD16: return "paddw"; |
| case Xsse_ADD32: return "paddd"; |
| case Xsse_ADD64: return "paddq"; |
| case Xsse_QADD8U: return "paddusb"; |
| case Xsse_QADD16U: return "paddusw"; |
| case Xsse_QADD8S: return "paddsb"; |
| case Xsse_QADD16S: return "paddsw"; |
| case Xsse_SUB8: return "psubb"; |
| case Xsse_SUB16: return "psubw"; |
| case Xsse_SUB32: return "psubd"; |
| case Xsse_SUB64: return "psubq"; |
| case Xsse_QSUB8U: return "psubusb"; |
| case Xsse_QSUB16U: return "psubusw"; |
| case Xsse_QSUB8S: return "psubsb"; |
| case Xsse_QSUB16S: return "psubsw"; |
| case Xsse_MUL16: return "pmullw"; |
| case Xsse_MULHI16U: return "pmulhuw"; |
| case Xsse_MULHI16S: return "pmulhw"; |
| case Xsse_AVG8U: return "pavgb"; |
| case Xsse_AVG16U: return "pavgw"; |
| case Xsse_MAX16S: return "pmaxw"; |
| case Xsse_MAX8U: return "pmaxub"; |
| case Xsse_MIN16S: return "pminw"; |
| case Xsse_MIN8U: return "pminub"; |
| case Xsse_CMPEQ8: return "pcmpeqb"; |
| case Xsse_CMPEQ16: return "pcmpeqw"; |
| case Xsse_CMPEQ32: return "pcmpeqd"; |
| case Xsse_CMPGT8S: return "pcmpgtb"; |
| case Xsse_CMPGT16S: return "pcmpgtw"; |
| case Xsse_CMPGT32S: return "pcmpgtd"; |
| case Xsse_SHL16: return "psllw"; |
| case Xsse_SHL32: return "pslld"; |
| case Xsse_SHL64: return "psllq"; |
| case Xsse_SHR16: return "psrlw"; |
| case Xsse_SHR32: return "psrld"; |
| case Xsse_SHR64: return "psrlq"; |
| case Xsse_SAR16: return "psraw"; |
| case Xsse_SAR32: return "psrad"; |
| case Xsse_PACKSSD: return "packssdw"; |
| case Xsse_PACKSSW: return "packsswb"; |
| case Xsse_PACKUSW: return "packuswb"; |
| case Xsse_UNPCKHB: return "punpckhb"; |
| case Xsse_UNPCKHW: return "punpckhw"; |
| case Xsse_UNPCKHD: return "punpckhd"; |
| case Xsse_UNPCKHQ: return "punpckhq"; |
| case Xsse_UNPCKLB: return "punpcklb"; |
| case Xsse_UNPCKLW: return "punpcklw"; |
| case Xsse_UNPCKLD: return "punpckld"; |
| case Xsse_UNPCKLQ: return "punpcklq"; |
| default: vpanic("showX86SseOp"); |
| } |
| } |
| |
| X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Alu32R; |
| i->Xin.Alu32R.op = op; |
| i->Xin.Alu32R.src = src; |
| i->Xin.Alu32R.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Alu32M; |
| i->Xin.Alu32M.op = op; |
| i->Xin.Alu32M.src = src; |
| i->Xin.Alu32M.dst = dst; |
| vassert(op != Xalu_MUL); |
| return i; |
| } |
| X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sh32; |
| i->Xin.Sh32.op = op; |
| i->Xin.Sh32.src = src; |
| i->Xin.Sh32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Test32; |
| i->Xin.Test32.imm32 = imm32; |
| i->Xin.Test32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Unary32; |
| i->Xin.Unary32.op = op; |
| i->Xin.Unary32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Lea32; |
| i->Xin.Lea32.am = am; |
| i->Xin.Lea32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_MulL; |
| i->Xin.MulL.syned = syned; |
| i->Xin.MulL.src = src; |
| return i; |
| } |
| X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Div; |
| i->Xin.Div.syned = syned; |
| i->Xin.Div.src = src; |
| return i; |
| } |
| X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sh3232; |
| i->Xin.Sh3232.op = op; |
| i->Xin.Sh3232.amt = amt; |
| i->Xin.Sh3232.src = src; |
| i->Xin.Sh3232.dst = dst; |
| vassert(op == Xsh_SHL || op == Xsh_SHR); |
| return i; |
| } |
| X86Instr* X86Instr_Push( X86RMI* src ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Push; |
| i->Xin.Push.src = src; |
| return i; |
| } |
| X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms, |
| RetLoc rloc ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Call; |
| i->Xin.Call.cond = cond; |
| i->Xin.Call.target = target; |
| i->Xin.Call.regparms = regparms; |
| i->Xin.Call.rloc = rloc; |
| vassert(regparms >= 0 && regparms <= 3); |
| vassert(is_sane_RetLoc(rloc)); |
| return i; |
| } |
| X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, |
| X86CondCode cond, Bool toFastEP ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_XDirect; |
| i->Xin.XDirect.dstGA = dstGA; |
| i->Xin.XDirect.amEIP = amEIP; |
| i->Xin.XDirect.cond = cond; |
| i->Xin.XDirect.toFastEP = toFastEP; |
| return i; |
| } |
| X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, |
| X86CondCode cond ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_XIndir; |
| i->Xin.XIndir.dstGA = dstGA; |
| i->Xin.XIndir.amEIP = amEIP; |
| i->Xin.XIndir.cond = cond; |
| return i; |
| } |
| X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, |
| X86CondCode cond, IRJumpKind jk ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_XAssisted; |
| i->Xin.XAssisted.dstGA = dstGA; |
| i->Xin.XAssisted.amEIP = amEIP; |
| i->Xin.XAssisted.cond = cond; |
| i->Xin.XAssisted.jk = jk; |
| return i; |
| } |
| X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_CMov32; |
| i->Xin.CMov32.cond = cond; |
| i->Xin.CMov32.src = src; |
| i->Xin.CMov32.dst = dst; |
| vassert(cond != Xcc_ALWAYS); |
| return i; |
| } |
| X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, |
| X86AMode* src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_LoadEX; |
| i->Xin.LoadEX.szSmall = szSmall; |
| i->Xin.LoadEX.syned = syned; |
| i->Xin.LoadEX.src = src; |
| i->Xin.LoadEX.dst = dst; |
| vassert(szSmall == 1 || szSmall == 2); |
| return i; |
| } |
| X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Store; |
| i->Xin.Store.sz = sz; |
| i->Xin.Store.src = src; |
| i->Xin.Store.dst = dst; |
| vassert(sz == 1 || sz == 2); |
| return i; |
| } |
| X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Set32; |
| i->Xin.Set32.cond = cond; |
| i->Xin.Set32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Bsfr32; |
| i->Xin.Bsfr32.isFwds = isFwds; |
| i->Xin.Bsfr32.src = src; |
| i->Xin.Bsfr32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_MFence ( UInt hwcaps ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_MFence; |
| i->Xin.MFence.hwcaps = hwcaps; |
| vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT |
| |VEX_HWCAPS_X86_SSE1 |
| |VEX_HWCAPS_X86_SSE2 |
| |VEX_HWCAPS_X86_SSE3 |
| |VEX_HWCAPS_X86_LZCNT))); |
| return i; |
| } |
| X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_ACAS; |
| i->Xin.ACAS.addr = addr; |
| i->Xin.ACAS.sz = sz; |
| vassert(sz == 4 || sz == 2 || sz == 1); |
| return i; |
| } |
| X86Instr* X86Instr_DACAS ( X86AMode* addr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_DACAS; |
| i->Xin.DACAS.addr = addr; |
| return i; |
| } |
| |
| X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpUnary; |
| i->Xin.FpUnary.op = op; |
| i->Xin.FpUnary.src = src; |
| i->Xin.FpUnary.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpBinary; |
| i->Xin.FpBinary.op = op; |
| i->Xin.FpBinary.srcL = srcL; |
| i->Xin.FpBinary.srcR = srcR; |
| i->Xin.FpBinary.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpLdSt; |
| i->Xin.FpLdSt.isLoad = isLoad; |
| i->Xin.FpLdSt.sz = sz; |
| i->Xin.FpLdSt.reg = reg; |
| i->Xin.FpLdSt.addr = addr; |
| vassert(sz == 4 || sz == 8 || sz == 10); |
| return i; |
| } |
| X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, |
| HReg reg, X86AMode* addr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpLdStI; |
| i->Xin.FpLdStI.isLoad = isLoad; |
| i->Xin.FpLdStI.sz = sz; |
| i->Xin.FpLdStI.reg = reg; |
| i->Xin.FpLdStI.addr = addr; |
| vassert(sz == 2 || sz == 4 || sz == 8); |
| return i; |
| } |
| X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Fp64to32; |
| i->Xin.Fp64to32.src = src; |
| i->Xin.Fp64to32.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpCMov; |
| i->Xin.FpCMov.cond = cond; |
| i->Xin.FpCMov.src = src; |
| i->Xin.FpCMov.dst = dst; |
| vassert(cond != Xcc_ALWAYS); |
| return i; |
| } |
| X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpLdCW; |
| i->Xin.FpLdCW.addr = addr; |
| return i; |
| } |
| X86Instr* X86Instr_FpStSW_AX ( void ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpStSW_AX; |
| return i; |
| } |
| X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_FpCmp; |
| i->Xin.FpCmp.srcL = srcL; |
| i->Xin.FpCmp.srcR = srcR; |
| i->Xin.FpCmp.dst = dst; |
| return i; |
| } |
| X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseConst; |
| i->Xin.SseConst.con = con; |
| i->Xin.SseConst.dst = dst; |
| vassert(hregClass(dst) == HRcVec128); |
| return i; |
| } |
| X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseLdSt; |
| i->Xin.SseLdSt.isLoad = isLoad; |
| i->Xin.SseLdSt.reg = reg; |
| i->Xin.SseLdSt.addr = addr; |
| return i; |
| } |
| X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) |
| { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseLdzLO; |
| i->Xin.SseLdzLO.sz = toUChar(sz); |
| i->Xin.SseLdzLO.reg = reg; |
| i->Xin.SseLdzLO.addr = addr; |
| vassert(sz == 4 || sz == 8); |
| return i; |
| } |
| X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sse32Fx4; |
| i->Xin.Sse32Fx4.op = op; |
| i->Xin.Sse32Fx4.src = src; |
| i->Xin.Sse32Fx4.dst = dst; |
| vassert(op != Xsse_MOV); |
| return i; |
| } |
| X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sse32FLo; |
| i->Xin.Sse32FLo.op = op; |
| i->Xin.Sse32FLo.src = src; |
| i->Xin.Sse32FLo.dst = dst; |
| vassert(op != Xsse_MOV); |
| return i; |
| } |
| X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sse64Fx2; |
| i->Xin.Sse64Fx2.op = op; |
| i->Xin.Sse64Fx2.src = src; |
| i->Xin.Sse64Fx2.dst = dst; |
| vassert(op != Xsse_MOV); |
| return i; |
| } |
| X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_Sse64FLo; |
| i->Xin.Sse64FLo.op = op; |
| i->Xin.Sse64FLo.src = src; |
| i->Xin.Sse64FLo.dst = dst; |
| vassert(op != Xsse_MOV); |
| return i; |
| } |
| X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseReRg; |
| i->Xin.SseReRg.op = op; |
| i->Xin.SseReRg.src = re; |
| i->Xin.SseReRg.dst = rg; |
| return i; |
| } |
| X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseCMov; |
| i->Xin.SseCMov.cond = cond; |
| i->Xin.SseCMov.src = src; |
| i->Xin.SseCMov.dst = dst; |
| vassert(cond != Xcc_ALWAYS); |
| return i; |
| } |
| X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_SseShuf; |
| i->Xin.SseShuf.order = order; |
| i->Xin.SseShuf.src = src; |
| i->Xin.SseShuf.dst = dst; |
| vassert(order >= 0 && order <= 0xFF); |
| return i; |
| } |
| X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, |
| X86AMode* amFailAddr ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_EvCheck; |
| i->Xin.EvCheck.amCounter = amCounter; |
| i->Xin.EvCheck.amFailAddr = amFailAddr; |
| return i; |
| } |
| X86Instr* X86Instr_ProfInc ( void ) { |
| X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); |
| i->tag = Xin_ProfInc; |
| return i; |
| } |
| |
| void ppX86Instr ( X86Instr* i, Bool mode64 ) { |
| vassert(mode64 == False); |
| switch (i->tag) { |
| case Xin_Alu32R: |
| vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); |
| ppX86RMI(i->Xin.Alu32R.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Alu32R.dst); |
| return; |
| case Xin_Alu32M: |
| vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); |
| ppX86RI(i->Xin.Alu32M.src); |
| vex_printf(","); |
| ppX86AMode(i->Xin.Alu32M.dst); |
| return; |
| case Xin_Sh32: |
| vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); |
| if (i->Xin.Sh32.src == 0) |
| vex_printf("%%cl,"); |
| else |
| vex_printf("$%d,", (Int)i->Xin.Sh32.src); |
| ppHRegX86(i->Xin.Sh32.dst); |
| return; |
| case Xin_Test32: |
| vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); |
| ppX86RM(i->Xin.Test32.dst); |
| return; |
| case Xin_Unary32: |
| vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); |
| ppHRegX86(i->Xin.Unary32.dst); |
| return; |
| case Xin_Lea32: |
| vex_printf("leal "); |
| ppX86AMode(i->Xin.Lea32.am); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Lea32.dst); |
| return; |
| case Xin_MulL: |
| vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); |
| ppX86RM(i->Xin.MulL.src); |
| return; |
| case Xin_Div: |
| vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); |
| ppX86RM(i->Xin.Div.src); |
| return; |
| case Xin_Sh3232: |
| vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); |
| if (i->Xin.Sh3232.amt == 0) |
| vex_printf(" %%cl,"); |
| else |
| vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); |
| ppHRegX86(i->Xin.Sh3232.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Sh3232.dst); |
| return; |
| case Xin_Push: |
| vex_printf("pushl "); |
| ppX86RMI(i->Xin.Push.src); |
| return; |
| case Xin_Call: |
| vex_printf("call%s[%d,", |
| i->Xin.Call.cond==Xcc_ALWAYS |
| ? "" : showX86CondCode(i->Xin.Call.cond), |
| i->Xin.Call.regparms); |
| ppRetLoc(i->Xin.Call.rloc); |
| vex_printf("] 0x%x", i->Xin.Call.target); |
| break; |
| case Xin_XDirect: |
| vex_printf("(xDirect) "); |
| vex_printf("if (%%eflags.%s) { ", |
| showX86CondCode(i->Xin.XDirect.cond)); |
| vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); |
| ppX86AMode(i->Xin.XDirect.amEIP); |
| vex_printf("; "); |
| vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", |
| i->Xin.XDirect.toFastEP ? "fast" : "slow"); |
| return; |
| case Xin_XIndir: |
| vex_printf("(xIndir) "); |
| vex_printf("if (%%eflags.%s) { movl ", |
| showX86CondCode(i->Xin.XIndir.cond)); |
| ppHRegX86(i->Xin.XIndir.dstGA); |
| vex_printf(","); |
| ppX86AMode(i->Xin.XIndir.amEIP); |
| vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); |
| return; |
| case Xin_XAssisted: |
| vex_printf("(xAssisted) "); |
| vex_printf("if (%%eflags.%s) { ", |
| showX86CondCode(i->Xin.XAssisted.cond)); |
| vex_printf("movl "); |
| ppHRegX86(i->Xin.XAssisted.dstGA); |
| vex_printf(","); |
| ppX86AMode(i->Xin.XAssisted.amEIP); |
| vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", |
| (Int)i->Xin.XAssisted.jk); |
| vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); |
| return; |
| case Xin_CMov32: |
| vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); |
| ppX86RM(i->Xin.CMov32.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.CMov32.dst); |
| return; |
| case Xin_LoadEX: |
| vex_printf("mov%c%cl ", |
| i->Xin.LoadEX.syned ? 's' : 'z', |
| i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); |
| ppX86AMode(i->Xin.LoadEX.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.LoadEX.dst); |
| return; |
| case Xin_Store: |
| vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); |
| ppHRegX86(i->Xin.Store.src); |
| vex_printf(","); |
| ppX86AMode(i->Xin.Store.dst); |
| return; |
| case Xin_Set32: |
| vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); |
| ppHRegX86(i->Xin.Set32.dst); |
| return; |
| case Xin_Bsfr32: |
| vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); |
| ppHRegX86(i->Xin.Bsfr32.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Bsfr32.dst); |
| return; |
| case Xin_MFence: |
| vex_printf("mfence(%s)", |
| LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); |
| return; |
| case Xin_ACAS: |
| vex_printf("lock cmpxchg%c ", |
| i->Xin.ACAS.sz==1 ? 'b' |
| : i->Xin.ACAS.sz==2 ? 'w' : 'l'); |
| vex_printf("{%%eax->%%ebx},"); |
| ppX86AMode(i->Xin.ACAS.addr); |
| return; |
| case Xin_DACAS: |
| vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); |
| ppX86AMode(i->Xin.DACAS.addr); |
| return; |
| case Xin_FpUnary: |
| vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); |
| ppHRegX86(i->Xin.FpUnary.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpUnary.dst); |
| break; |
| case Xin_FpBinary: |
| vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); |
| ppHRegX86(i->Xin.FpBinary.srcL); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpBinary.srcR); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpBinary.dst); |
| break; |
| case Xin_FpLdSt: |
| if (i->Xin.FpLdSt.isLoad) { |
| vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' |
| : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); |
| ppX86AMode(i->Xin.FpLdSt.addr); |
| vex_printf(", "); |
| ppHRegX86(i->Xin.FpLdSt.reg); |
| } else { |
| vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' |
| : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); |
| ppHRegX86(i->Xin.FpLdSt.reg); |
| vex_printf(", "); |
| ppX86AMode(i->Xin.FpLdSt.addr); |
| } |
| return; |
| case Xin_FpLdStI: |
| if (i->Xin.FpLdStI.isLoad) { |
| vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : |
| i->Xin.FpLdStI.sz==4 ? "l" : "w"); |
| ppX86AMode(i->Xin.FpLdStI.addr); |
| vex_printf(", "); |
| ppHRegX86(i->Xin.FpLdStI.reg); |
| } else { |
| vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : |
| i->Xin.FpLdStI.sz==4 ? "l" : "w"); |
| ppHRegX86(i->Xin.FpLdStI.reg); |
| vex_printf(", "); |
| ppX86AMode(i->Xin.FpLdStI.addr); |
| } |
| return; |
| case Xin_Fp64to32: |
| vex_printf("gdtof "); |
| ppHRegX86(i->Xin.Fp64to32.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Fp64to32.dst); |
| return; |
| case Xin_FpCMov: |
| vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); |
| ppHRegX86(i->Xin.FpCMov.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpCMov.dst); |
| return; |
| case Xin_FpLdCW: |
| vex_printf("fldcw "); |
| ppX86AMode(i->Xin.FpLdCW.addr); |
| return; |
| case Xin_FpStSW_AX: |
| vex_printf("fstsw %%ax"); |
| return; |
| case Xin_FpCmp: |
| vex_printf("gcmp "); |
| ppHRegX86(i->Xin.FpCmp.srcL); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpCmp.srcR); |
| vex_printf(","); |
| ppHRegX86(i->Xin.FpCmp.dst); |
| break; |
| case Xin_SseConst: |
| vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); |
| ppHRegX86(i->Xin.SseConst.dst); |
| break; |
| case Xin_SseLdSt: |
| vex_printf("movups "); |
| if (i->Xin.SseLdSt.isLoad) { |
| ppX86AMode(i->Xin.SseLdSt.addr); |
| vex_printf(","); |
| ppHRegX86(i->Xin.SseLdSt.reg); |
| } else { |
| ppHRegX86(i->Xin.SseLdSt.reg); |
| vex_printf(","); |
| ppX86AMode(i->Xin.SseLdSt.addr); |
| } |
| return; |
| case Xin_SseLdzLO: |
| vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); |
| ppX86AMode(i->Xin.SseLdzLO.addr); |
| vex_printf(","); |
| ppHRegX86(i->Xin.SseLdzLO.reg); |
| return; |
| case Xin_Sse32Fx4: |
| vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); |
| ppHRegX86(i->Xin.Sse32Fx4.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Sse32Fx4.dst); |
| return; |
| case Xin_Sse32FLo: |
| vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); |
| ppHRegX86(i->Xin.Sse32FLo.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Sse32FLo.dst); |
| return; |
| case Xin_Sse64Fx2: |
| vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); |
| ppHRegX86(i->Xin.Sse64Fx2.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Sse64Fx2.dst); |
| return; |
| case Xin_Sse64FLo: |
| vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); |
| ppHRegX86(i->Xin.Sse64FLo.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.Sse64FLo.dst); |
| return; |
| case Xin_SseReRg: |
| vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); |
| ppHRegX86(i->Xin.SseReRg.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.SseReRg.dst); |
| return; |
| case Xin_SseCMov: |
| vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); |
| ppHRegX86(i->Xin.SseCMov.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.SseCMov.dst); |
| return; |
| case Xin_SseShuf: |
| vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); |
| ppHRegX86(i->Xin.SseShuf.src); |
| vex_printf(","); |
| ppHRegX86(i->Xin.SseShuf.dst); |
| return; |
| case Xin_EvCheck: |
| vex_printf("(evCheck) decl "); |
| ppX86AMode(i->Xin.EvCheck.amCounter); |
| vex_printf("; jns nofail; jmp *"); |
| ppX86AMode(i->Xin.EvCheck.amFailAddr); |
| vex_printf("; nofail:"); |
| return; |
| case Xin_ProfInc: |
| vex_printf("(profInc) addl $1,NotKnownYet; " |
| "adcl $0,NotKnownYet+4"); |
| return; |
| default: |
| vpanic("ppX86Instr"); |
| } |
| } |
| |
| /* --------- Helpers for register allocation. --------- */ |
| |
| void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) |
| { |
| Bool unary; |
| vassert(mode64 == False); |
| initHRegUsage(u); |
| switch (i->tag) { |
| case Xin_Alu32R: |
| addRegUsage_X86RMI(u, i->Xin.Alu32R.src); |
| if (i->Xin.Alu32R.op == Xalu_MOV) { |
| addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); |
| return; |
| } |
| if (i->Xin.Alu32R.op == Xalu_CMP) { |
| addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); |
| return; |
| } |
| addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); |
| return; |
| case Xin_Alu32M: |
| addRegUsage_X86RI(u, i->Xin.Alu32M.src); |
| addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); |
| return; |
| case Xin_Sh32: |
| addHRegUse(u, HRmModify, i->Xin.Sh32.dst); |
| if (i->Xin.Sh32.src == 0) |
| addHRegUse(u, HRmRead, hregX86_ECX()); |
| return; |
| case Xin_Test32: |
| addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); |
| return; |
| case Xin_Unary32: |
| addHRegUse(u, HRmModify, i->Xin.Unary32.dst); |
| return; |
| case Xin_Lea32: |
| addRegUsage_X86AMode(u, i->Xin.Lea32.am); |
| addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); |
| return; |
| case Xin_MulL: |
| addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); |
| addHRegUse(u, HRmModify, hregX86_EAX()); |
| addHRegUse(u, HRmWrite, hregX86_EDX()); |
| return; |
| case Xin_Div: |
| addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); |
| addHRegUse(u, HRmModify, hregX86_EAX()); |
| addHRegUse(u, HRmModify, hregX86_EDX()); |
| return; |
| case Xin_Sh3232: |
| addHRegUse(u, HRmRead, i->Xin.Sh3232.src); |
| addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); |
| if (i->Xin.Sh3232.amt == 0) |
| addHRegUse(u, HRmRead, hregX86_ECX()); |
| return; |
| case Xin_Push: |
| addRegUsage_X86RMI(u, i->Xin.Push.src); |
| addHRegUse(u, HRmModify, hregX86_ESP()); |
| return; |
| case Xin_Call: |
| /* This is a bit subtle. */ |
| /* First off, claim it trashes all the caller-saved regs |
| which fall within the register allocator's jurisdiction. |
| These I believe to be %eax %ecx %edx and all the xmm |
| registers. */ |
| addHRegUse(u, HRmWrite, hregX86_EAX()); |
| addHRegUse(u, HRmWrite, hregX86_ECX()); |
| addHRegUse(u, HRmWrite, hregX86_EDX()); |
| addHRegUse(u, HRmWrite, hregX86_XMM0()); |
| addHRegUse(u, HRmWrite, hregX86_XMM1()); |
| addHRegUse(u, HRmWrite, hregX86_XMM2()); |
| addHRegUse(u, HRmWrite, hregX86_XMM3()); |
| addHRegUse(u, HRmWrite, hregX86_XMM4()); |
| addHRegUse(u, HRmWrite, hregX86_XMM5()); |
| addHRegUse(u, HRmWrite, hregX86_XMM6()); |
| addHRegUse(u, HRmWrite, hregX86_XMM7()); |
| /* Now we have to state any parameter-carrying registers |
| which might be read. This depends on the regparmness. */ |
| switch (i->Xin.Call.regparms) { |
| case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ |
| case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ |
| case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; |
| case 0: break; |
| default: vpanic("getRegUsage_X86Instr:Call:regparms"); |
| } |
| /* Finally, there is the issue that the insn trashes a |
| register because the literal target address has to be |
| loaded into a register. Fortunately, for the 0/1/2 |
| regparm case, we can use EAX, EDX and ECX respectively, so |
| this does not cause any further damage. For the 3-regparm |
| case, we'll have to choose another register arbitrarily -- |
| since A, D and C are used for parameters -- and so we might |
| as well choose EDI. */ |
| if (i->Xin.Call.regparms == 3) |
| addHRegUse(u, HRmWrite, hregX86_EDI()); |
| /* Upshot of this is that the assembler really must observe |
| the here-stated convention of which register to use as an |
| address temporary, depending on the regparmness: 0==EAX, |
| 1==EDX, 2==ECX, 3==EDI. */ |
| return; |
| /* XDirect/XIndir/XAssisted are also a bit subtle. They |
| conditionally exit the block. Hence we only need to list (1) |
| the registers that they read, and (2) the registers that they |
| write in the case where the block is not exited. (2) is |
| empty, hence only (1) is relevant here. */ |
| case Xin_XDirect: |
| addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); |
| return; |
| case Xin_XIndir: |
| addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); |
| addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); |
| return; |
| case Xin_XAssisted: |
| addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); |
| addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); |
| return; |
| case Xin_CMov32: |
| addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); |
| addHRegUse(u, HRmModify, i->Xin.CMov32.dst); |
| return; |
| case Xin_LoadEX: |
| addRegUsage_X86AMode(u, i->Xin.LoadEX.src); |
| addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); |
| return; |
| case Xin_Store: |
| addHRegUse(u, HRmRead, i->Xin.Store.src); |
| addRegUsage_X86AMode(u, i->Xin.Store.dst); |
| return; |
| case Xin_Set32: |
| addHRegUse(u, HRmWrite, i->Xin.Set32.dst); |
| return; |
| case Xin_Bsfr32: |
| addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); |
| addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); |
| return; |
| case Xin_MFence: |
| return; |
| case Xin_ACAS: |
| addRegUsage_X86AMode(u, i->Xin.ACAS.addr); |
| addHRegUse(u, HRmRead, hregX86_EBX()); |
| addHRegUse(u, HRmModify, hregX86_EAX()); |
| return; |
| case Xin_DACAS: |
| addRegUsage_X86AMode(u, i->Xin.DACAS.addr); |
| addHRegUse(u, HRmRead, hregX86_ECX()); |
| addHRegUse(u, HRmRead, hregX86_EBX()); |
| addHRegUse(u, HRmModify, hregX86_EDX()); |
| addHRegUse(u, HRmModify, hregX86_EAX()); |
| return; |
| case Xin_FpUnary: |
| addHRegUse(u, HRmRead, i->Xin.FpUnary.src); |
| addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); |
| return; |
| case Xin_FpBinary: |
| addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); |
| addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); |
| addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); |
| return; |
| case Xin_FpLdSt: |
| addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); |
| addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, |
| i->Xin.FpLdSt.reg); |
| return; |
| case Xin_FpLdStI: |
| addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); |
| addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, |
| i->Xin.FpLdStI.reg); |
| return; |
| case Xin_Fp64to32: |
| addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); |
| addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); |
| return; |
| case Xin_FpCMov: |
| addHRegUse(u, HRmRead, i->Xin.FpCMov.src); |
| addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); |
| return; |
| case Xin_FpLdCW: |
| addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); |
| return; |
| case Xin_FpStSW_AX: |
| addHRegUse(u, HRmWrite, hregX86_EAX()); |
| return; |
| case Xin_FpCmp: |
| addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); |
| addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); |
| addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); |
| addHRegUse(u, HRmWrite, hregX86_EAX()); |
| return; |
| case Xin_SseLdSt: |
| addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); |
| addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, |
| i->Xin.SseLdSt.reg); |
| return; |
| case Xin_SseLdzLO: |
| addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); |
| addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); |
| return; |
| case Xin_SseConst: |
| addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); |
| return; |
| case Xin_Sse32Fx4: |
| vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); |
| unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF |
| || i->Xin.Sse32Fx4.op == Xsse_RSQRTF |
| || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Xin.Sse32Fx4.dst); |
| return; |
| case Xin_Sse32FLo: |
| vassert(i->Xin.Sse32FLo.op != Xsse_MOV); |
| unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF |
| || i->Xin.Sse32FLo.op == Xsse_RSQRTF |
| || i->Xin.Sse32FLo.op == Xsse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Xin.Sse32FLo.dst); |
| return; |
| case Xin_Sse64Fx2: |
| vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); |
| unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF |
| || i->Xin.Sse64Fx2.op == Xsse_RSQRTF |
| || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Xin.Sse64Fx2.dst); |
| return; |
| case Xin_Sse64FLo: |
| vassert(i->Xin.Sse64FLo.op != Xsse_MOV); |
| unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF |
| || i->Xin.Sse64FLo.op == Xsse_RSQRTF |
| || i->Xin.Sse64FLo.op == Xsse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Xin.Sse64FLo.dst); |
| return; |
| case Xin_SseReRg: |
| if (i->Xin.SseReRg.op == Xsse_XOR |
| && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) { |
| /* reg-alloc needs to understand 'xor r,r' as a write of r */ |
| /* (as opposed to a rite of passage :-) */ |
| addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); |
| } else { |
| addHRegUse(u, HRmRead, i->Xin.SseReRg.src); |
| addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV |
| ? HRmWrite : HRmModify, |
| i->Xin.SseReRg.dst); |
| } |
| return; |
| case Xin_SseCMov: |
| addHRegUse(u, HRmRead, i->Xin.SseCMov.src); |
| addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); |
| return; |
| case Xin_SseShuf: |
| addHRegUse(u, HRmRead, i->Xin.SseShuf.src); |
| addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); |
| return; |
| case Xin_EvCheck: |
| /* We expect both amodes only to mention %ebp, so this is in |
| fact pointless, since %ebp isn't allocatable, but anyway.. */ |
| addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); |
| addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); |
| return; |
| case Xin_ProfInc: |
| /* does not use any registers. */ |
| return; |
| default: |
| ppX86Instr(i, False); |
| vpanic("getRegUsage_X86Instr"); |
| } |
| } |
| |
| /* local helper */ |
| static void mapReg( HRegRemap* m, HReg* r ) |
| { |
| *r = lookupHRegRemap(m, *r); |
| } |
| |
| void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) |
| { |
| vassert(mode64 == False); |
| switch (i->tag) { |
| case Xin_Alu32R: |
| mapRegs_X86RMI(m, i->Xin.Alu32R.src); |
| mapReg(m, &i->Xin.Alu32R.dst); |
| return; |
| case Xin_Alu32M: |
| mapRegs_X86RI(m, i->Xin.Alu32M.src); |
| mapRegs_X86AMode(m, i->Xin.Alu32M.dst); |
| return; |
| case Xin_Sh32: |
| mapReg(m, &i->Xin.Sh32.dst); |
| return; |
| case Xin_Test32: |
| mapRegs_X86RM(m, i->Xin.Test32.dst); |
| return; |
| case Xin_Unary32: |
| mapReg(m, &i->Xin.Unary32.dst); |
| return; |
| case Xin_Lea32: |
| mapRegs_X86AMode(m, i->Xin.Lea32.am); |
| mapReg(m, &i->Xin.Lea32.dst); |
| return; |
| case Xin_MulL: |
| mapRegs_X86RM(m, i->Xin.MulL.src); |
| return; |
| case Xin_Div: |
| mapRegs_X86RM(m, i->Xin.Div.src); |
| return; |
| case Xin_Sh3232: |
| mapReg(m, &i->Xin.Sh3232.src); |
| mapReg(m, &i->Xin.Sh3232.dst); |
| return; |
| case Xin_Push: |
| mapRegs_X86RMI(m, i->Xin.Push.src); |
| return; |
| case Xin_Call: |
| return; |
| case Xin_XDirect: |
| mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); |
| return; |
| case Xin_XIndir: |
| mapReg(m, &i->Xin.XIndir.dstGA); |
| mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); |
| return; |
| case Xin_XAssisted: |
| mapReg(m, &i->Xin.XAssisted.dstGA); |
| mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); |
| return; |
| case Xin_CMov32: |
| mapRegs_X86RM(m, i->Xin.CMov32.src); |
| mapReg(m, &i->Xin.CMov32.dst); |
| return; |
| case Xin_LoadEX: |
| mapRegs_X86AMode(m, i->Xin.LoadEX.src); |
| mapReg(m, &i->Xin.LoadEX.dst); |
| return; |
| case Xin_Store: |
| mapReg(m, &i->Xin.Store.src); |
| mapRegs_X86AMode(m, i->Xin.Store.dst); |
| return; |
| case Xin_Set32: |
| mapReg(m, &i->Xin.Set32.dst); |
| return; |
| case Xin_Bsfr32: |
| mapReg(m, &i->Xin.Bsfr32.src); |
| mapReg(m, &i->Xin.Bsfr32.dst); |
| return; |
| case Xin_MFence: |
| return; |
| case Xin_ACAS: |
| mapRegs_X86AMode(m, i->Xin.ACAS.addr); |
| return; |
| case Xin_DACAS: |
| mapRegs_X86AMode(m, i->Xin.DACAS.addr); |
| return; |
| case Xin_FpUnary: |
| mapReg(m, &i->Xin.FpUnary.src); |
| mapReg(m, &i->Xin.FpUnary.dst); |
| return; |
| case Xin_FpBinary: |
| mapReg(m, &i->Xin.FpBinary.srcL); |
| mapReg(m, &i->Xin.FpBinary.srcR); |
| mapReg(m, &i->Xin.FpBinary.dst); |
| return; |
| case Xin_FpLdSt: |
| mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); |
| mapReg(m, &i->Xin.FpLdSt.reg); |
| return; |
| case Xin_FpLdStI: |
| mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); |
| mapReg(m, &i->Xin.FpLdStI.reg); |
| return; |
| case Xin_Fp64to32: |
| mapReg(m, &i->Xin.Fp64to32.src); |
| mapReg(m, &i->Xin.Fp64to32.dst); |
| return; |
| case Xin_FpCMov: |
| mapReg(m, &i->Xin.FpCMov.src); |
| mapReg(m, &i->Xin.FpCMov.dst); |
| return; |
| case Xin_FpLdCW: |
| mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); |
| return; |
| case Xin_FpStSW_AX: |
| return; |
| case Xin_FpCmp: |
| mapReg(m, &i->Xin.FpCmp.srcL); |
| mapReg(m, &i->Xin.FpCmp.srcR); |
| mapReg(m, &i->Xin.FpCmp.dst); |
| return; |
| case Xin_SseConst: |
| mapReg(m, &i->Xin.SseConst.dst); |
| return; |
| case Xin_SseLdSt: |
| mapReg(m, &i->Xin.SseLdSt.reg); |
| mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); |
| break; |
| case Xin_SseLdzLO: |
| mapReg(m, &i->Xin.SseLdzLO.reg); |
| mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); |
| break; |
| case Xin_Sse32Fx4: |
| mapReg(m, &i->Xin.Sse32Fx4.src); |
| mapReg(m, &i->Xin.Sse32Fx4.dst); |
| return; |
| case Xin_Sse32FLo: |
| mapReg(m, &i->Xin.Sse32FLo.src); |
| mapReg(m, &i->Xin.Sse32FLo.dst); |
| return; |
| case Xin_Sse64Fx2: |
| mapReg(m, &i->Xin.Sse64Fx2.src); |
| mapReg(m, &i->Xin.Sse64Fx2.dst); |
| return; |
| case Xin_Sse64FLo: |
| mapReg(m, &i->Xin.Sse64FLo.src); |
| mapReg(m, &i->Xin.Sse64FLo.dst); |
| return; |
| case Xin_SseReRg: |
| mapReg(m, &i->Xin.SseReRg.src); |
| mapReg(m, &i->Xin.SseReRg.dst); |
| return; |
| case Xin_SseCMov: |
| mapReg(m, &i->Xin.SseCMov.src); |
| mapReg(m, &i->Xin.SseCMov.dst); |
| return; |
| case Xin_SseShuf: |
| mapReg(m, &i->Xin.SseShuf.src); |
| mapReg(m, &i->Xin.SseShuf.dst); |
| return; |
| case Xin_EvCheck: |
| /* We expect both amodes only to mention %ebp, so this is in |
| fact pointless, since %ebp isn't allocatable, but anyway.. */ |
| mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); |
| mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); |
| return; |
| case Xin_ProfInc: |
| /* does not use any registers. */ |
| return; |
| |
| default: |
| ppX86Instr(i, mode64); |
| vpanic("mapRegs_X86Instr"); |
| } |
| } |
| |
| /* Figure out if i represents a reg-reg move, and if so assign the |
| source and destination to *src and *dst. If in doubt say No. Used |
| by the register allocator to do move coalescing. |
| */ |
| Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst ) |
| { |
| /* Moves between integer regs */ |
| if (i->tag == Xin_Alu32R) { |
| if (i->Xin.Alu32R.op != Xalu_MOV) |
| return False; |
| if (i->Xin.Alu32R.src->tag != Xrmi_Reg) |
| return False; |
| *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; |
| *dst = i->Xin.Alu32R.dst; |
| return True; |
| } |
| /* Moves between FP regs */ |
| if (i->tag == Xin_FpUnary) { |
| if (i->Xin.FpUnary.op != Xfp_MOV) |
| return False; |
| *src = i->Xin.FpUnary.src; |
| *dst = i->Xin.FpUnary.dst; |
| return True; |
| } |
| if (i->tag == Xin_SseReRg) { |
| if (i->Xin.SseReRg.op != Xsse_MOV) |
| return False; |
| *src = i->Xin.SseReRg.src; |
| *dst = i->Xin.SseReRg.dst; |
| return True; |
| } |
| return False; |
| } |
| |
| |
| /* Generate x86 spill/reload instructions under the direction of the |
| register allocator. Note it's critical these don't write the |
| condition codes. */ |
| |
| void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, |
| HReg rreg, Int offsetB, Bool mode64 ) |
| { |
| X86AMode* am; |
| vassert(offsetB >= 0); |
| vassert(!hregIsVirtual(rreg)); |
| vassert(mode64 == False); |
| *i1 = *i2 = NULL; |
| am = X86AMode_IR(offsetB, hregX86_EBP()); |
| switch (hregClass(rreg)) { |
| case HRcInt32: |
| *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); |
| return; |
| case HRcFlt64: |
| *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); |
| return; |
| case HRcVec128: |
| *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); |
| return; |
| default: |
| ppHRegClass(hregClass(rreg)); |
| vpanic("genSpill_X86: unimplemented regclass"); |
| } |
| } |
| |
| void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, |
| HReg rreg, Int offsetB, Bool mode64 ) |
| { |
| X86AMode* am; |
| vassert(offsetB >= 0); |
| vassert(!hregIsVirtual(rreg)); |
| vassert(mode64 == False); |
| *i1 = *i2 = NULL; |
| am = X86AMode_IR(offsetB, hregX86_EBP()); |
| switch (hregClass(rreg)) { |
| case HRcInt32: |
| *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); |
| return; |
| case HRcFlt64: |
| *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); |
| return; |
| case HRcVec128: |
| *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); |
| return; |
| default: |
| ppHRegClass(hregClass(rreg)); |
| vpanic("genReload_X86: unimplemented regclass"); |
| } |
| } |
| |
| /* The given instruction reads the specified vreg exactly once, and |
| that vreg is currently located at the given spill offset. If |
| possible, return a variant of the instruction to one which instead |
| references the spill slot directly. */ |
| |
| X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) |
| { |
| vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ |
| |
| /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg |
| Convert to: src=RMI_Mem, dst=Reg |
| */ |
| if (i->tag == Xin_Alu32R |
| && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR |
| || i->Xin.Alu32R.op == Xalu_XOR) |
| && i->Xin.Alu32R.src->tag == Xrmi_Reg |
| && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) { |
| vassert(! sameHReg(i->Xin.Alu32R.dst, vreg)); |
| return X86Instr_Alu32R( |
| i->Xin.Alu32R.op, |
| X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), |
| i->Xin.Alu32R.dst |
| ); |
| } |
| |
| /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg |
| Convert to: src=RI_Imm, dst=Mem |
| */ |
| if (i->tag == Xin_Alu32R |
| && (i->Xin.Alu32R.op == Xalu_CMP) |
| && i->Xin.Alu32R.src->tag == Xrmi_Imm |
| && sameHReg(i->Xin.Alu32R.dst, vreg)) { |
| return X86Instr_Alu32M( |
| i->Xin.Alu32R.op, |
| X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), |
| X86AMode_IR( spill_off, hregX86_EBP()) |
| ); |
| } |
| |
| /* Deal with form: Push(RMI_Reg) |
| Convert to: Push(RMI_Mem) |
| */ |
| if (i->tag == Xin_Push |
| && i->Xin.Push.src->tag == Xrmi_Reg |
| && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) { |
| return X86Instr_Push( |
| X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) |
| ); |
| } |
| |
| /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src |
| Convert to CMov32(RM_Mem, dst) */ |
| if (i->tag == Xin_CMov32 |
| && i->Xin.CMov32.src->tag == Xrm_Reg |
| && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) { |
| vassert(! sameHReg(i->Xin.CMov32.dst, vreg)); |
| return X86Instr_CMov32( |
| i->Xin.CMov32.cond, |
| X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), |
| i->Xin.CMov32.dst |
| ); |
| } |
| |
| /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ |
| if (i->tag == Xin_Test32 |
| && i->Xin.Test32.dst->tag == Xrm_Reg |
| && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) { |
| return X86Instr_Test32( |
| i->Xin.Test32.imm32, |
| X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) |
| ); |
| } |
| |
| return NULL; |
| } |
| |
| |
| /* --------- The x86 assembler (bleh.) --------- */ |
| |
| static UChar iregNo ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcInt32); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 7); |
| return toUChar(n); |
| } |
| |
| static UInt fregNo ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcFlt64); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 5); |
| return n; |
| } |
| |
| static UInt vregNo ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcVec128); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 7); |
| return n; |
| } |
| |
| static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem ) |
| { |
| vassert(mod < 4); |
| vassert((reg|regmem) < 8); |
| return toUChar( ((mod & 3) << 6) |
| | ((reg & 7) << 3) |
| | (regmem & 7) ); |
| } |
| |
| static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase ) |
| { |
| vassert(shift < 4); |
| vassert((regindex|regbase) < 8); |
| return toUChar( ((shift & 3) << 6) |
| | ((regindex & 7) << 3) |
| | (regbase & 7) ); |
| } |
| |
| static UChar* emit32 ( UChar* p, UInt w32 ) |
| { |
| *p++ = toUChar( w32 & 0x000000FF); |
| *p++ = toUChar((w32 >> 8) & 0x000000FF); |
| *p++ = toUChar((w32 >> 16) & 0x000000FF); |
| *p++ = toUChar((w32 >> 24) & 0x000000FF); |
| return p; |
| } |
| |
| /* Does a sign-extend of the lowest 8 bits give |
| the original number? */ |
| static Bool fits8bits ( UInt w32 ) |
| { |
| Int i32 = (Int)w32; |
| return toBool(i32 == ((i32 << 24) >> 24)); |
| } |
| |
| |
| /* Forming mod-reg-rm bytes and scale-index-base bytes. |
| |
| greg, 0(ereg) | ereg != ESP && ereg != EBP |
| = 00 greg ereg |
| |
| greg, d8(ereg) | ereg != ESP |
| = 01 greg ereg, d8 |
| |
| greg, d32(ereg) | ereg != ESP |
| = 10 greg ereg, d32 |
| |
| greg, d8(%esp) = 01 greg 100, 0x24, d8 |
| |
| ----------------------------------------------- |
| |
| greg, d8(base,index,scale) |
| | index != ESP |
| = 01 greg 100, scale index base, d8 |
| |
| greg, d32(base,index,scale) |
| | index != ESP |
| = 10 greg 100, scale index base, d32 |
| */ |
| static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) |
| { |
| if (am->tag == Xam_IR) { |
| if (am->Xam.IR.imm == 0 |
| && ! sameHReg(am->Xam.IR.reg, hregX86_ESP()) |
| && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) { |
| *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg)); |
| return p; |
| } |
| if (fits8bits(am->Xam.IR.imm) |
| && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { |
| *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg)); |
| *p++ = toUChar(am->Xam.IR.imm & 0xFF); |
| return p; |
| } |
| if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) { |
| *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg)); |
| p = emit32(p, am->Xam.IR.imm); |
| return p; |
| } |
| if (sameHReg(am->Xam.IR.reg, hregX86_ESP()) |
| && fits8bits(am->Xam.IR.imm)) { |
| *p++ = mkModRegRM(1, iregNo(greg), 4); |
| *p++ = 0x24; |
| *p++ = toUChar(am->Xam.IR.imm & 0xFF); |
| return p; |
| } |
| ppX86AMode(am); |
| vpanic("doAMode_M: can't emit amode IR"); |
| /*NOTREACHED*/ |
| } |
| if (am->tag == Xam_IRRS) { |
| if (fits8bits(am->Xam.IRRS.imm) |
| && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { |
| *p++ = mkModRegRM(1, iregNo(greg), 4); |
| *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index), |
| iregNo(am->Xam.IRRS.base)); |
| *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); |
| return p; |
| } |
| if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) { |
| *p++ = mkModRegRM(2, iregNo(greg), 4); |
| *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index), |
| iregNo(am->Xam.IRRS.base)); |
| p = emit32(p, am->Xam.IRRS.imm); |
| return p; |
| } |
| ppX86AMode(am); |
| vpanic("doAMode_M: can't emit amode IRRS"); |
| /*NOTREACHED*/ |
| } |
| vpanic("doAMode_M: unknown amode"); |
| /*NOTREACHED*/ |
| } |
| |
| |
| /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ |
| static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) |
| { |
| *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg)); |
| return p; |
| } |
| |
| |
| /* Emit ffree %st(7) */ |
| static UChar* do_ffree_st7 ( UChar* p ) |
| { |
| *p++ = 0xDD; |
| *p++ = 0xC7; |
| return p; |
| } |
| |
| /* Emit fstp %st(i), 1 <= i <= 7 */ |
| static UChar* do_fstp_st ( UChar* p, Int i ) |
| { |
| vassert(1 <= i && i <= 7); |
| *p++ = 0xDD; |
| *p++ = toUChar(0xD8+i); |
| return p; |
| } |
| |
| /* Emit fld %st(i), 0 <= i <= 6 */ |
| static UChar* do_fld_st ( UChar* p, Int i ) |
| { |
| vassert(0 <= i && i <= 6); |
| *p++ = 0xD9; |
| *p++ = toUChar(0xC0+i); |
| return p; |
| } |
| |
| /* Emit f<op> %st(0) */ |
| static UChar* do_fop1_st ( UChar* p, X86FpOp op ) |
| { |
| switch (op) { |
| case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; |
| case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; |
| case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; |
| case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; |
| case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; |
| case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; |
| case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; |
| case Xfp_MOV: break; |
| case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ |
| *p++ = 0xD9; *p++ = 0xF2; /* fptan */ |
| *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ |
| break; |
| default: vpanic("do_fop1_st: unknown op"); |
| } |
| return p; |
| } |
| |
| /* Emit f<op> %st(i), 1 <= i <= 5 */ |
| static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) |
| { |
| # define fake(_n) mkHReg((_n), HRcInt32, False) |
| Int subopc; |
| switch (op) { |
| case Xfp_ADD: subopc = 0; break; |
| case Xfp_SUB: subopc = 4; break; |
| case Xfp_MUL: subopc = 1; break; |
| case Xfp_DIV: subopc = 6; break; |
| default: vpanic("do_fop2_st: unknown op"); |
| } |
| *p++ = 0xD8; |
| p = doAMode_R(p, fake(subopc), fake(i)); |
| return p; |
| # undef fake |
| } |
| |
| /* Push a 32-bit word on the stack. The word depends on tags[3:0]; |
| each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. |
| */ |
| static UChar* push_word_from_tags ( UChar* p, UShort tags ) |
| { |
| UInt w; |
| vassert(0 == (tags & ~0xF)); |
| if (tags == 0) { |
| /* pushl $0x00000000 */ |
| *p++ = 0x6A; |
| *p++ = 0x00; |
| } |
| else |
| /* pushl $0xFFFFFFFF */ |
| if (tags == 0xF) { |
| *p++ = 0x6A; |
| *p++ = 0xFF; |
| } else { |
| vassert(0); /* awaiting test case */ |
| w = 0; |
| if (tags & 1) w |= 0x000000FF; |
| if (tags & 2) w |= 0x0000FF00; |
| if (tags & 4) w |= 0x00FF0000; |
| if (tags & 8) w |= 0xFF000000; |
| *p++ = 0x68; |
| p = emit32(p, w); |
| } |
| return p; |
| } |
| |
| /* Emit an instruction into buf and return the number of bytes used. |
| Note that buf is not the insn's final place, and therefore it is |
| imperative to emit position-independent code. If the emitted |
| instruction was a profiler inc, set *is_profInc to True, else |
| leave it unchanged. */ |
| |
| Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, |
| UChar* buf, Int nbuf, X86Instr* i, |
| Bool mode64, |
| void* disp_cp_chain_me_to_slowEP, |
| void* disp_cp_chain_me_to_fastEP, |
| void* disp_cp_xindir, |
| void* disp_cp_xassisted ) |
| { |
| UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; |
| |
| UInt xtra; |
| UChar* p = &buf[0]; |
| UChar* ptmp; |
| vassert(nbuf >= 32); |
| vassert(mode64 == False); |
| |
| /* Wrap an integer as a int register, for use assembling |
| GrpN insns, in which the greg field is used as a sub-opcode |
| and does not really contain a register. */ |
| # define fake(_n) mkHReg((_n), HRcInt32, False) |
| |
| /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ |
| |
| switch (i->tag) { |
| |
| case Xin_Alu32R: |
| /* Deal specially with MOV */ |
| if (i->Xin.Alu32R.op == Xalu_MOV) { |
| switch (i->Xin.Alu32R.src->tag) { |
| case Xrmi_Imm: |
| *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst)); |
| p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| goto done; |
| case Xrmi_Reg: |
| *p++ = 0x89; |
| p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, |
| i->Xin.Alu32R.dst); |
| goto done; |
| case Xrmi_Mem: |
| *p++ = 0x8B; |
| p = doAMode_M(p, i->Xin.Alu32R.dst, |
| i->Xin.Alu32R.src->Xrmi.Mem.am); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| /* MUL */ |
| if (i->Xin.Alu32R.op == Xalu_MUL) { |
| switch (i->Xin.Alu32R.src->tag) { |
| case Xrmi_Reg: |
| *p++ = 0x0F; |
| *p++ = 0xAF; |
| p = doAMode_R(p, i->Xin.Alu32R.dst, |
| i->Xin.Alu32R.src->Xrmi.Reg.reg); |
| goto done; |
| case Xrmi_Mem: |
| *p++ = 0x0F; |
| *p++ = 0xAF; |
| p = doAMode_M(p, i->Xin.Alu32R.dst, |
| i->Xin.Alu32R.src->Xrmi.Mem.am); |
| goto done; |
| case Xrmi_Imm: |
| if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { |
| *p++ = 0x6B; |
| p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); |
| *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| } else { |
| *p++ = 0x69; |
| p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); |
| p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| } |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ |
| opc = opc_rr = subopc_imm = opc_imma = 0; |
| switch (i->Xin.Alu32R.op) { |
| case Xalu_ADC: opc = 0x13; opc_rr = 0x11; |
| subopc_imm = 2; opc_imma = 0x15; break; |
| case Xalu_ADD: opc = 0x03; opc_rr = 0x01; |
| subopc_imm = 0; opc_imma = 0x05; break; |
| case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; |
| subopc_imm = 5; opc_imma = 0x2D; break; |
| case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; |
| subopc_imm = 3; opc_imma = 0x1D; break; |
| case Xalu_AND: opc = 0x23; opc_rr = 0x21; |
| subopc_imm = 4; opc_imma = 0x25; break; |
| case Xalu_XOR: opc = 0x33; opc_rr = 0x31; |
| subopc_imm = 6; opc_imma = 0x35; break; |
| case Xalu_OR: opc = 0x0B; opc_rr = 0x09; |
| subopc_imm = 1; opc_imma = 0x0D; break; |
| case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; |
| subopc_imm = 7; opc_imma = 0x3D; break; |
| default: goto bad; |
| } |
| switch (i->Xin.Alu32R.src->tag) { |
| case Xrmi_Imm: |
| if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX()) |
| && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { |
| *p++ = toUChar(opc_imma); |
| p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| } else |
| if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { |
| *p++ = 0x83; |
| p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); |
| *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| } else { |
| *p++ = 0x81; |
| p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); |
| p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); |
| } |
| goto done; |
| case Xrmi_Reg: |
| *p++ = toUChar(opc_rr); |
| p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, |
| i->Xin.Alu32R.dst); |
| goto done; |
| case Xrmi_Mem: |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, i->Xin.Alu32R.dst, |
| i->Xin.Alu32R.src->Xrmi.Mem.am); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Xin_Alu32M: |
| /* Deal specially with MOV */ |
| if (i->Xin.Alu32M.op == Xalu_MOV) { |
| switch (i->Xin.Alu32M.src->tag) { |
| case Xri_Reg: |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, |
| i->Xin.Alu32M.dst); |
| goto done; |
| case Xri_Imm: |
| *p++ = 0xC7; |
| p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst); |
| p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not |
| allowed here. */ |
| opc = subopc_imm = opc_imma = 0; |
| switch (i->Xin.Alu32M.op) { |
| case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; |
| case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; |
| case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; |
| default: goto bad; |
| } |
| switch (i->Xin.Alu32M.src->tag) { |
| case Xri_Reg: |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, |
| i->Xin.Alu32M.dst); |
| goto done; |
| case Xri_Imm: |
| if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { |
| *p++ = 0x83; |
| p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); |
| *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); |
| goto done; |
| } else { |
| *p++ = 0x81; |
| p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); |
| p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); |
| goto done; |
| } |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Xin_Sh32: |
| opc_cl = opc_imm = subopc = 0; |
| switch (i->Xin.Sh32.op) { |
| case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; |
| case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; |
| case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; |
| default: goto bad; |
| } |
| if (i->Xin.Sh32.src == 0) { |
| *p++ = toUChar(opc_cl); |
| p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); |
| } else { |
| *p++ = toUChar(opc_imm); |
| p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); |
| *p++ = (UChar)(i->Xin.Sh32.src); |
| } |
| goto done; |
| |
| case Xin_Test32: |
| if (i->Xin.Test32.dst->tag == Xrm_Reg) { |
| /* testl $imm32, %reg */ |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); |
| p = emit32(p, i->Xin.Test32.imm32); |
| goto done; |
| } else { |
| /* testl $imm32, amode */ |
| *p++ = 0xF7; |
| p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); |
| p = emit32(p, i->Xin.Test32.imm32); |
| goto done; |
| } |
| |
| case Xin_Unary32: |
| if (i->Xin.Unary32.op == Xun_NOT) { |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(2), i->Xin.Unary32.dst); |
| goto done; |
| } |
| if (i->Xin.Unary32.op == Xun_NEG) { |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(3), i->Xin.Unary32.dst); |
| goto done; |
| } |
| break; |
| |
| case Xin_Lea32: |
| *p++ = 0x8D; |
| p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); |
| goto done; |
| |
| case Xin_MulL: |
| subopc = i->Xin.MulL.syned ? 5 : 4; |
| *p++ = 0xF7; |
| switch (i->Xin.MulL.src->tag) { |
| case Xrm_Mem: |
| p = doAMode_M(p, fake(subopc), |
| i->Xin.MulL.src->Xrm.Mem.am); |
| goto done; |
| case Xrm_Reg: |
| p = doAMode_R(p, fake(subopc), |
| i->Xin.MulL.src->Xrm.Reg.reg); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Xin_Div: |
| subopc = i->Xin.Div.syned ? 7 : 6; |
| *p++ = 0xF7; |
| switch (i->Xin.Div.src->tag) { |
| case Xrm_Mem: |
| p = doAMode_M(p, fake(subopc), |
| i->Xin.Div.src->Xrm.Mem.am); |
| goto done; |
| case Xrm_Reg: |
| p = doAMode_R(p, fake(subopc), |
| i->Xin.Div.src->Xrm.Reg.reg); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Xin_Sh3232: |
| vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); |
| if (i->Xin.Sh3232.amt == 0) { |
| /* shldl/shrdl by %cl */ |
| *p++ = 0x0F; |
| if (i->Xin.Sh3232.op == Xsh_SHL) { |
| *p++ = 0xA5; |
| } else { |
| *p++ = 0xAD; |
| } |
| p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); |
| goto done; |
| } |
| break; |
| |
| case Xin_Push: |
| switch (i->Xin.Push.src->tag) { |
| case Xrmi_Mem: |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am); |
| goto done; |
| case Xrmi_Imm: |
| *p++ = 0x68; |
| p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); |
| goto done; |
| case Xrmi_Reg: |
| *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg)); |
| goto done; |
| default: |
| goto bad; |
| } |
| |
| case Xin_Call: |
| if (i->Xin.Call.cond != Xcc_ALWAYS |
| && i->Xin.Call.rloc.pri != RLPri_None) { |
| /* The call might not happen (it isn't unconditional) and it |
| returns a result. In this case we will need to generate a |
| control flow diamond to put 0x555..555 in the return |
| register(s) in the case where the call doesn't happen. If |
| this ever becomes necessary, maybe copy code from the ARM |
| equivalent. Until that day, just give up. */ |
| goto bad; |
| } |
| /* See detailed comment for Xin_Call in getRegUsage_X86Instr above |
| for explanation of this. */ |
| switch (i->Xin.Call.regparms) { |
| case 0: irno = iregNo(hregX86_EAX()); break; |
| case 1: irno = iregNo(hregX86_EDX()); break; |
| case 2: irno = iregNo(hregX86_ECX()); break; |
| case 3: irno = iregNo(hregX86_EDI()); break; |
| default: vpanic(" emit_X86Instr:call:regparms"); |
| } |
| /* jump over the following two insns if the condition does not |
| hold */ |
| if (i->Xin.Call.cond != Xcc_ALWAYS) { |
| *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); |
| *p++ = 0x07; /* 7 bytes in the next two insns */ |
| } |
| /* movl $target, %tmp */ |
| *p++ = toUChar(0xB8 + irno); |
| p = emit32(p, i->Xin.Call.target); |
| /* call *%tmp */ |
| *p++ = 0xFF; |
| *p++ = toUChar(0xD0 + irno); |
| goto done; |
| |
| case Xin_XDirect: { |
| /* NB: what goes on here has to be very closely coordinated with the |
| chainXDirect_X86 and unchainXDirect_X86 below. */ |
| /* We're generating chain-me requests here, so we need to be |
| sure this is actually allowed -- no-redir translations can't |
| use chain-me's. Hence: */ |
| vassert(disp_cp_chain_me_to_slowEP != NULL); |
| vassert(disp_cp_chain_me_to_fastEP != NULL); |
| |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Xin.XDirect.cond != Xcc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* Update the guest EIP. */ |
| /* movl $dstGA, amEIP */ |
| *p++ = 0xC7; |
| p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP); |
| p = emit32(p, i->Xin.XDirect.dstGA); |
| |
| /* --- FIRST PATCHABLE BYTE follows --- */ |
| /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling |
| to) backs up the return address, so as to find the address of |
| the first patchable byte. So: don't change the length of the |
| two instructions below. */ |
| /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ |
| *p++ = 0xBA; |
| void* disp_cp_chain_me |
| = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP |
| : disp_cp_chain_me_to_slowEP; |
| p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me)); |
| /* call *%edx */ |
| *p++ = 0xFF; |
| *p++ = 0xD2; |
| /* --- END of PATCHABLE BYTES --- */ |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Xin.XDirect.cond != Xcc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Xin_XIndir: { |
| /* We're generating transfers that could lead indirectly to a |
| chain-me, so we need to be sure this is actually allowed -- |
| no-redir translations are not allowed to reach normal |
| translations without going through the scheduler. That means |
| no XDirects or XIndirs out from no-redir translations. |
| Hence: */ |
| vassert(disp_cp_xindir != NULL); |
| |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Xin.XIndir.cond != Xcc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); |
| |
| /* movl $disp_indir, %edx */ |
| *p++ = 0xBA; |
| p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); |
| /* jmp *%edx */ |
| *p++ = 0xFF; |
| *p++ = 0xE2; |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Xin.XIndir.cond != Xcc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Xin_XAssisted: { |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); |
| /* movl $magic_number, %ebp. */ |
| UInt trcval = 0; |
| switch (i->Xin.XAssisted.jk) { |
| case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; |
| case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; |
| case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; |
| case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break; |
| case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break; |
| case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break; |
| case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; |
| case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; |
| case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; |
| case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; |
| case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; |
| case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; |
| case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; |
| case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; |
| case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; |
| /* We don't expect to see the following being assisted. */ |
| case Ijk_Ret: |
| case Ijk_Call: |
| /* fallthrough */ |
| default: |
| ppIRJumpKind(i->Xin.XAssisted.jk); |
| vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); |
| } |
| vassert(trcval != 0); |
| *p++ = 0xBD; |
| p = emit32(p, trcval); |
| |
| /* movl $disp_indir, %edx */ |
| *p++ = 0xBA; |
| p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted)); |
| /* jmp *%edx */ |
| *p++ = 0xFF; |
| *p++ = 0xE2; |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Xin_CMov32: |
| vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); |
| |
| /* This generates cmov, which is illegal on P54/P55. */ |
| /* |
| *p++ = 0x0F; |
| *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); |
| if (i->Xin.CMov32.src->tag == Xrm_Reg) { |
| p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); |
| goto done; |
| } |
| if (i->Xin.CMov32.src->tag == Xrm_Mem) { |
| p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); |
| goto done; |
| } |
| */ |
| |
| /* Alternative version which works on any x86 variant. */ |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); |
| *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ |
| ptmp = p; |
| |
| switch (i->Xin.CMov32.src->tag) { |
| case Xrm_Reg: |
| /* Big sigh. This is movl E -> G ... */ |
| *p++ = 0x89; |
| p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, |
| i->Xin.CMov32.dst); |
| |
| break; |
| case Xrm_Mem: |
| /* ... whereas this is movl G -> E. That's why the args |
| to doAMode_R appear to be the wrong way round in the |
| Xrm_Reg case. */ |
| *p++ = 0x8B; |
| p = doAMode_M(p, i->Xin.CMov32.dst, |
| i->Xin.CMov32.src->Xrm.Mem.am); |
| break; |
| default: |
| goto bad; |
| } |
| /* Fill in the jump offset. */ |
| *(ptmp-1) = toUChar(p - ptmp); |
| goto done; |
| |
| break; |
| |
| case Xin_LoadEX: |
| if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { |
| /* movzbl */ |
| *p++ = 0x0F; |
| *p++ = 0xB6; |
| p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); |
| goto done; |
| } |
| if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { |
| /* movzwl */ |
| *p++ = 0x0F; |
| *p++ = 0xB7; |
| p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); |
| goto done; |
| } |
| if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { |
| /* movsbl */ |
| *p++ = 0x0F; |
| *p++ = 0xBE; |
| p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); |
| goto done; |
| } |
| break; |
| |
| case Xin_Set32: |
| /* Make the destination register be 1 or 0, depending on whether |
| the relevant condition holds. We have to dodge and weave |
| when the destination is %esi or %edi as we cannot directly |
| emit the native 'setb %reg' for those. Further complication: |
| the top 24 bits of the destination should be forced to zero, |
| but doing 'xor %r,%r' kills the flag(s) we are about to read. |
| Sigh. So start off my moving $0 into the dest. */ |
| |
| /* Do we need to swap in %eax? */ |
| if (iregNo(i->Xin.Set32.dst) >= 4) { |
| /* xchg %eax, %dst */ |
| *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); |
| /* movl $0, %eax */ |
| *p++ =toUChar(0xB8 + iregNo(hregX86_EAX())); |
| p = emit32(p, 0); |
| /* setb lo8(%eax) */ |
| *p++ = 0x0F; |
| *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); |
| p = doAMode_R(p, fake(0), hregX86_EAX()); |
| /* xchg %eax, %dst */ |
| *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); |
| } else { |
| /* movl $0, %dst */ |
| *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst)); |
| p = emit32(p, 0); |
| /* setb lo8(%dst) */ |
| *p++ = 0x0F; |
| *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); |
| p = doAMode_R(p, fake(0), i->Xin.Set32.dst); |
| } |
| goto done; |
| |
| case Xin_Bsfr32: |
| *p++ = 0x0F; |
| if (i->Xin.Bsfr32.isFwds) { |
| *p++ = 0xBC; |
| } else { |
| *p++ = 0xBD; |
| } |
| p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); |
| goto done; |
| |
| case Xin_MFence: |
| /* see comment in hdefs.h re this insn */ |
| if (0) vex_printf("EMIT FENCE\n"); |
| if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 |
| |VEX_HWCAPS_X86_SSE2)) { |
| /* mfence */ |
| *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; |
| goto done; |
| } |
| if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) { |
| /* sfence */ |
| *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; |
| /* lock addl $0,0(%esp) */ |
| *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; |
| *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; |
| goto done; |
| } |
| if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { |
| /* lock addl $0,0(%esp) */ |
| *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; |
| *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; |
| goto done; |
| } |
| vpanic("emit_X86Instr:mfence:hwcaps"); |
| /*NOTREACHED*/ |
| break; |
| |
| case Xin_ACAS: |
| /* lock */ |
| *p++ = 0xF0; |
| /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value |
| in %ebx. The new-value register is hardwired to be %ebx |
| since letting it be any integer register gives the problem |
| that %sil and %dil are unaddressible on x86 and hence we |
| would have to resort to the same kind of trickery as with |
| byte-sized Xin.Store, just below. Given that this isn't |
| performance critical, it is simpler just to force the |
| register operand to %ebx (could equally be %ecx or %edx). |
| (Although %ebx is more consistent with cmpxchg8b.) */ |
| if (i->Xin.ACAS.sz == 2) *p++ = 0x66; |
| *p++ = 0x0F; |
| if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; |
| p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); |
| goto done; |
| |
| case Xin_DACAS: |
| /* lock */ |
| *p++ = 0xF0; |
| /* cmpxchg8b m64. Expected-value in %edx:%eax, new value |
| in %ecx:%ebx. All 4 regs are hardwired in the ISA, so |
| aren't encoded in the insn. */ |
| *p++ = 0x0F; |
| *p++ = 0xC7; |
| p = doAMode_M(p, fake(1), i->Xin.DACAS.addr); |
| goto done; |
| |
| case Xin_Store: |
| if (i->Xin.Store.sz == 2) { |
| /* This case, at least, is simple, given that we can |
| reference the low 16 bits of any integer register. */ |
| *p++ = 0x66; |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); |
| goto done; |
| } |
| |
| if (i->Xin.Store.sz == 1) { |
| /* We have to do complex dodging and weaving if src is not |
| the low 8 bits of %eax/%ebx/%ecx/%edx. */ |
| if (iregNo(i->Xin.Store.src) < 4) { |
| /* we're OK, can do it directly */ |
| *p++ = 0x88; |
| p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); |
| goto done; |
| } else { |
| /* Bleh. This means the source is %edi or %esi. Since |
| the address mode can only mention three registers, at |
| least one of %eax/%ebx/%ecx/%edx must be available to |
| temporarily swap the source into, so the store can |
| happen. So we have to look at the regs mentioned |
| in the amode. */ |
| HReg swap = INVALID_HREG; |
| HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), |
| ecx = hregX86_ECX(), edx = hregX86_EDX(); |
| Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True; |
| HRegUsage u; |
| Int j; |
| initHRegUsage(&u); |
| addRegUsage_X86AMode(&u, i->Xin.Store.dst); |
| for (j = 0; j < u.n_used; j++) { |
| HReg r = u.hreg[j]; |
| if (sameHReg(r, eax)) a_ok = False; |
| if (sameHReg(r, ebx)) b_ok = False; |
| if (sameHReg(r, ecx)) c_ok = False; |
| if (sameHReg(r, edx)) d_ok = False; |
| } |
| if (a_ok) swap = eax; |
| if (b_ok) swap = ebx; |
| if (c_ok) swap = ecx; |
| if (d_ok) swap = edx; |
| vassert(! hregIsInvalid(swap)); |
| /* xchgl %source, %swap. Could do better if swap is %eax. */ |
| *p++ = 0x87; |
| p = doAMode_R(p, i->Xin.Store.src, swap); |
| /* movb lo8{%swap}, (dst) */ |
| *p++ = 0x88; |
| p = doAMode_M(p, swap, i->Xin.Store.dst); |
| /* xchgl %source, %swap. Could do better if swap is %eax. */ |
| *p++ = 0x87; |
| p = doAMode_R(p, i->Xin.Store.src, swap); |
| goto done; |
| } |
| } /* if (i->Xin.Store.sz == 1) */ |
| break; |
| |
| case Xin_FpUnary: |
| /* gop %src, %dst |
| --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) |
| */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); |
| p = do_fop1_st(p, i->Xin.FpUnary.op); |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); |
| goto done; |
| |
| case Xin_FpBinary: |
| if (i->Xin.FpBinary.op == Xfp_YL2X |
| || i->Xin.FpBinary.op == Xfp_YL2XP1) { |
| /* Have to do this specially. */ |
| /* ffree %st7 ; fld %st(srcL) ; |
| ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); |
| *p++ = 0xD9; |
| *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); |
| goto done; |
| } |
| if (i->Xin.FpBinary.op == Xfp_ATAN) { |
| /* Have to do this specially. */ |
| /* ffree %st7 ; fld %st(srcL) ; |
| ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); |
| *p++ = 0xD9; *p++ = 0xF3; |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); |
| goto done; |
| } |
| if (i->Xin.FpBinary.op == Xfp_PREM |
| || i->Xin.FpBinary.op == Xfp_PREM1 |
| || i->Xin.FpBinary.op == Xfp_SCALE) { |
| /* Have to do this specially. */ |
| /* ffree %st7 ; fld %st(srcR) ; |
| ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; |
| fincstp ; ffree %st7 */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); |
| *p++ = 0xD9; |
| switch (i->Xin.FpBinary.op) { |
| case Xfp_PREM: *p++ = 0xF8; break; |
| case Xfp_PREM1: *p++ = 0xF5; break; |
| case Xfp_SCALE: *p++ = 0xFD; break; |
| default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); |
| } |
| p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); |
| *p++ = 0xD9; *p++ = 0xF7; |
| p = do_ffree_st7(p); |
| goto done; |
| } |
| /* General case */ |
| /* gop %srcL, %srcR, %dst |
| --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) |
| */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); |
| p = do_fop2_st(p, i->Xin.FpBinary.op, |
| 1+hregNumber(i->Xin.FpBinary.srcR)); |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); |
| goto done; |
| |
| case Xin_FpLdSt: |
| if (i->Xin.FpLdSt.isLoad) { |
| /* Load from memory into %fakeN. |
| --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) |
| */ |
| p = do_ffree_st7(p); |
| switch (i->Xin.FpLdSt.sz) { |
| case 4: |
| *p++ = 0xD9; |
| p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| case 8: |
| *p++ = 0xDD; |
| p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| case 10: |
| *p++ = 0xDB; |
| p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| default: |
| vpanic("emitX86Instr(FpLdSt,load)"); |
| } |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); |
| goto done; |
| } else { |
| /* Store from %fakeN into memory. |
| --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode |
| */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); |
| switch (i->Xin.FpLdSt.sz) { |
| case 4: |
| *p++ = 0xD9; |
| p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| case 8: |
| *p++ = 0xDD; |
| p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| case 10: |
| *p++ = 0xDB; |
| p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr); |
| break; |
| default: |
| vpanic("emitX86Instr(FpLdSt,store)"); |
| } |
| goto done; |
| } |
| break; |
| |
| case Xin_FpLdStI: |
| if (i->Xin.FpLdStI.isLoad) { |
| /* Load from memory into %fakeN, converting from an int. |
| --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) |
| */ |
| switch (i->Xin.FpLdStI.sz) { |
| case 8: opc = 0xDF; subopc_imm = 5; break; |
| case 4: opc = 0xDB; subopc_imm = 0; break; |
| case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; |
| default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); |
| } |
| p = do_ffree_st7(p); |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); |
| p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); |
| goto done; |
| } else { |
| /* Store from %fakeN into memory, converting to an int. |
| --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode |
| */ |
| switch (i->Xin.FpLdStI.sz) { |
| case 8: opc = 0xDF; subopc_imm = 7; break; |
| case 4: opc = 0xDB; subopc_imm = 3; break; |
| case 2: opc = 0xDF; subopc_imm = 3; break; |
| default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); |
| } |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); |
| goto done; |
| } |
| break; |
| |
| case Xin_Fp64to32: |
| /* ffree %st7 ; fld %st(src) */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); |
| /* subl $4, %esp */ |
| *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; |
| /* fstps (%esp) */ |
| *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; |
| /* flds (%esp) */ |
| *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; |
| /* addl $4, %esp */ |
| *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; |
| /* fstp %st(1+dst) */ |
| p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); |
| goto done; |
| |
| case Xin_FpCMov: |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); |
| *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ |
| ptmp = p; |
| |
| /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ |
| p = do_ffree_st7(p); |
| p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); |
| p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); |
| |
| /* Fill in the jump offset. */ |
| *(ptmp-1) = toUChar(p - ptmp); |
| goto done; |
| |
| case Xin_FpLdCW: |
| *p++ = 0xD9; |
| p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr); |
| goto done; |
| |
| case Xin_FpStSW_AX: |
| /* note, this emits fnstsw %ax, not fstsw %ax */ |
| *p++ = 0xDF; |
| *p++ = 0xE0; |
| goto done; |
| |
| case Xin_FpCmp: |
| /* gcmp %fL, %fR, %dst |
| -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; |
| fnstsw %ax ; movl %eax, %dst |
| */ |
| /* ffree %st7 */ |
| p = do_ffree_st7(p); |
| /* fpush %fL */ |
| p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); |
| /* fucomp %(fR+1) */ |
| *p++ = 0xDD; |
| *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)))); |
| /* fnstsw %ax */ |
| *p++ = 0xDF; |
| *p++ = 0xE0; |
| /* movl %eax, %dst */ |
| *p++ = 0x89; |
| p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); |
| goto done; |
| |
| case Xin_SseConst: { |
| UShort con = i->Xin.SseConst.con; |
| p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); |
| p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); |
| p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); |
| p = push_word_from_tags(p, toUShort(con & 0xF)); |
| /* movl (%esp), %xmm-dst */ |
| *p++ = 0x0F; |
| *p++ = 0x10; |
| *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst))); |
| *p++ = 0x24; |
| /* addl $16, %esp */ |
| *p++ = 0x83; |
| *p++ = 0xC4; |
| *p++ = 0x10; |
| goto done; |
| } |
| |
| case Xin_SseLdSt: |
| *p++ = 0x0F; |
| *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); |
| p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr); |
| goto done; |
| |
| case Xin_SseLdzLO: |
| vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); |
| /* movs[sd] amode, %xmm-dst */ |
| *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); |
| *p++ = 0x0F; |
| *p++ = 0x10; |
| p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)), |
| i->Xin.SseLdzLO.addr); |
| goto done; |
| |
| case Xin_Sse32Fx4: |
| xtra = 0; |
| *p++ = 0x0F; |
| switch (i->Xin.Sse32Fx4.op) { |
| case Xsse_ADDF: *p++ = 0x58; break; |
| case Xsse_DIVF: *p++ = 0x5E; break; |
| case Xsse_MAXF: *p++ = 0x5F; break; |
| case Xsse_MINF: *p++ = 0x5D; break; |
| case Xsse_MULF: *p++ = 0x59; break; |
| case Xsse_RCPF: *p++ = 0x53; break; |
| case Xsse_RSQRTF: *p++ = 0x52; break; |
| case Xsse_SQRTF: *p++ = 0x51; break; |
| case Xsse_SUBF: *p++ = 0x5C; break; |
| case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)), |
| fake(vregNo(i->Xin.Sse32Fx4.src)) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Xin_Sse64Fx2: |
| xtra = 0; |
| *p++ = 0x66; |
| *p++ = 0x0F; |
| switch (i->Xin.Sse64Fx2.op) { |
| case Xsse_ADDF: *p++ = 0x58; break; |
| case Xsse_DIVF: *p++ = 0x5E; break; |
| case Xsse_MAXF: *p++ = 0x5F; break; |
| case Xsse_MINF: *p++ = 0x5D; break; |
| case Xsse_MULF: *p++ = 0x59; break; |
| case Xsse_RCPF: *p++ = 0x53; break; |
| case Xsse_RSQRTF: *p++ = 0x52; break; |
| case Xsse_SQRTF: *p++ = 0x51; break; |
| case Xsse_SUBF: *p++ = 0x5C; break; |
| case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), |
| fake(vregNo(i->Xin.Sse64Fx2.src)) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Xin_Sse32FLo: |
| xtra = 0; |
| *p++ = 0xF3; |
| *p++ = 0x0F; |
| switch (i->Xin.Sse32FLo.op) { |
| case Xsse_ADDF: *p++ = 0x58; break; |
| case Xsse_DIVF: *p++ = 0x5E; break; |
| case Xsse_MAXF: *p++ = 0x5F; break; |
| case Xsse_MINF: *p++ = 0x5D; break; |
| case Xsse_MULF: *p++ = 0x59; break; |
| case Xsse_RCPF: *p++ = 0x53; break; |
| case Xsse_RSQRTF: *p++ = 0x52; break; |
| case Xsse_SQRTF: *p++ = 0x51; break; |
| case Xsse_SUBF: *p++ = 0x5C; break; |
| case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)), |
| fake(vregNo(i->Xin.Sse32FLo.src)) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Xin_Sse64FLo: |
| xtra = 0; |
| *p++ = 0xF2; |
| *p++ = 0x0F; |
| switch (i->Xin.Sse64FLo.op) { |
| case Xsse_ADDF: *p++ = 0x58; break; |
| case Xsse_DIVF: *p++ = 0x5E; break; |
| case Xsse_MAXF: *p++ = 0x5F; break; |
| case Xsse_MINF: *p++ = 0x5D; break; |
| case Xsse_MULF: *p++ = 0x59; break; |
| case Xsse_RCPF: *p++ = 0x53; break; |
| case Xsse_RSQRTF: *p++ = 0x52; break; |
| case Xsse_SQRTF: *p++ = 0x51; break; |
| case Xsse_SUBF: *p++ = 0x5C; break; |
| case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)), |
| fake(vregNo(i->Xin.Sse64FLo.src)) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Xin_SseReRg: |
| # define XX(_n) *p++ = (_n) |
| switch (i->Xin.SseReRg.op) { |
| case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; |
| case Xsse_OR: XX(0x0F); XX(0x56); break; |
| case Xsse_XOR: XX(0x0F); XX(0x57); break; |
| case Xsse_AND: XX(0x0F); XX(0x54); break; |
| case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; |
| case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; |
| case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; |
| case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; |
| case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; |
| case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; |
| case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; |
| case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; |
| case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; |
| case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; |
| case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; |
| case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; |
| case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; |
| case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; |
| case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; |
| case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; |
| case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; |
| case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; |
| case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; |
| case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; |
| case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; |
| case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; |
| case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; |
| case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; |
| case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; |
| case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; |
| case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; |
| case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; |
| case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; |
| case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; |
| case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; |
| case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; |
| case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; |
| case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; |
| case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; |
| case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; |
| case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; |
| case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; |
| case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; |
| case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; |
| case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; |
| case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; |
| case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; |
| case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; |
| case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; |
| case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; |
| case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; |
| case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; |
| case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; |
| case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)), |
| fake(vregNo(i->Xin.SseReRg.src)) ); |
| # undef XX |
| goto done; |
| |
| case Xin_SseCMov: |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); |
| *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ |
| ptmp = p; |
| |
| /* movaps %src, %dst */ |
| *p++ = 0x0F; |
| *p++ = 0x28; |
| p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)), |
| fake(vregNo(i->Xin.SseCMov.src)) ); |
| |
| /* Fill in the jump offset. */ |
| *(ptmp-1) = toUChar(p - ptmp); |
| goto done; |
| |
| case Xin_SseShuf: |
| *p++ = 0x66; |
| *p++ = 0x0F; |
| *p++ = 0x70; |
| p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)), |
| fake(vregNo(i->Xin.SseShuf.src)) ); |
| *p++ = (UChar)(i->Xin.SseShuf.order); |
| goto done; |
| |
| case Xin_EvCheck: { |
| /* We generate: |
| (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) |
| (2 bytes) jns nofail expected taken |
| (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) |
| nofail: |
| */ |
| /* This is heavily asserted re instruction lengths. It needs to |
| be. If we get given unexpected forms of .amCounter or |
| .amFailAddr -- basically, anything that's not of the form |
| uimm7(%ebp) -- they are likely to fail. */ |
| /* Note also that after the decl we must be very careful not to |
| read the carry flag, else we get a partial flags stall. |
| js/jns avoids that, though. */ |
| UChar* p0 = p; |
| /* --- decl 8(%ebp) --- */ |
| /* "fake(1)" because + there's no register in this encoding; |
| instead the register + field is used as a sub opcode. The |
| encoding for "decl r/m32" + is FF /1, hence the fake(1). */ |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter); |
| vassert(p - p0 == 3); |
| /* --- jns nofail --- */ |
| *p++ = 0x79; |
| *p++ = 0x03; /* need to check this 0x03 after the next insn */ |
| vassert(p - p0 == 5); |
| /* --- jmp* 0(%ebp) --- */ |
| /* The encoding is FF /4. */ |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr); |
| vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ |
| /* And crosscheck .. */ |
| vassert(evCheckSzB_X86() == 8); |
| goto done; |
| } |
| |
| case Xin_ProfInc: { |
| /* We generate addl $1,NotKnownYet |
| adcl $0,NotKnownYet+4 |
| in the expectation that a later call to LibVEX_patchProfCtr |
| will be used to fill in the immediate fields once the right |
| value is known. |
| 83 05 00 00 00 00 01 |
| 83 15 00 00 00 00 00 |
| */ |
| *p++ = 0x83; *p++ = 0x05; |
| *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; |
| *p++ = 0x01; |
| *p++ = 0x83; *p++ = 0x15; |
| *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; |
| *p++ = 0x00; |
| /* Tell the caller .. */ |
| vassert(!(*is_profInc)); |
| *is_profInc = True; |
| goto done; |
| } |
| |
| default: |
| goto bad; |
| } |
| |
| bad: |
| ppX86Instr(i, mode64); |
| vpanic("emit_X86Instr"); |
| /*NOTREACHED*/ |
| |
| done: |
| vassert(p - &buf[0] <= 32); |
| return p - &buf[0]; |
| |
| # undef fake |
| } |
| |
| |
| /* How big is an event check? See case for Xin_EvCheck in |
| emit_X86Instr just above. That crosschecks what this returns, so |
| we can tell if we're inconsistent. */ |
| Int evCheckSzB_X86 ( void ) |
| { |
| return 8; |
| } |
| |
| |
| /* NB: what goes on here has to be very closely coordinated with the |
| emitInstr case for XDirect, above. */ |
| VexInvalRange chainXDirect_X86 ( void* place_to_chain, |
| void* disp_cp_chain_me_EXPECTED, |
| void* place_to_jump_to ) |
| { |
| /* What we're expecting to see is: |
| movl $disp_cp_chain_me_EXPECTED, %edx |
| call *%edx |
| viz |
| BA <4 bytes value == disp_cp_chain_me_EXPECTED> |
| FF D2 |
| */ |
| UChar* p = (UChar*)place_to_chain; |
| vassert(p[0] == 0xBA); |
| vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); |
| vassert(p[5] == 0xFF); |
| vassert(p[6] == 0xD2); |
| /* And what we want to change it to is: |
| jmp disp32 where disp32 is relative to the next insn |
| ud2; |
| viz |
| E9 <4 bytes == disp32> |
| 0F 0B |
| The replacement has the same length as the original. |
| */ |
| /* This is the delta we need to put into a JMP d32 insn. It's |
| relative to the start of the next insn, hence the -5. */ |
| Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; |
| |
| /* And make the modifications. */ |
| p[0] = 0xE9; |
| p[1] = (delta >> 0) & 0xFF; |
| p[2] = (delta >> 8) & 0xFF; |
| p[3] = (delta >> 16) & 0xFF; |
| p[4] = (delta >> 24) & 0xFF; |
| p[5] = 0x0F; p[6] = 0x0B; |
| /* sanity check on the delta -- top 32 are all 0 or all 1 */ |
| delta >>= 32; |
| vassert(delta == 0LL || delta == -1LL); |
| VexInvalRange vir = { (HWord)place_to_chain, 7 }; |
| return vir; |
| } |
| |
| |
| /* NB: what goes on here has to be very closely coordinated with the |
| emitInstr case for XDirect, above. */ |
| VexInvalRange unchainXDirect_X86 ( void* place_to_unchain, |
| void* place_to_jump_to_EXPECTED, |
| void* disp_cp_chain_me ) |
| { |
| /* What we're expecting to see is: |
| jmp d32 |
| ud2; |
| viz |
| E9 <4 bytes == disp32> |
| 0F 0B |
| */ |
| UChar* p = (UChar*)place_to_unchain; |
| Bool valid = False; |
| if (p[0] == 0xE9 |
| && p[5] == 0x0F && p[6] == 0x0B) { |
| /* Check the offset is right. */ |
| Int s32 = *(Int*)(&p[1]); |
| if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) { |
| valid = True; |
| if (0) |
| vex_printf("QQQ unchainXDirect_X86: found valid\n"); |
| } |
| } |
| vassert(valid); |
| /* And what we want to change it to is: |
| movl $disp_cp_chain_me, %edx |
| call *%edx |
| viz |
| BA <4 bytes value == disp_cp_chain_me_EXPECTED> |
| FF D2 |
| So it's the same length (convenient, huh). |
| */ |
| p[0] = 0xBA; |
| *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me); |
| p[5] = 0xFF; |
| p[6] = 0xD2; |
| VexInvalRange vir = { (HWord)place_to_unchain, 7 }; |
| return vir; |
| } |
| |
| |
| /* Patch the counter address into a profile inc point, as previously |
| created by the Xin_ProfInc case for emit_X86Instr. */ |
| VexInvalRange patchProfInc_X86 ( void* place_to_patch, |
| ULong* location_of_counter ) |
| { |
| vassert(sizeof(ULong*) == 4); |
| UChar* p = (UChar*)place_to_patch; |
| vassert(p[0] == 0x83); |
| vassert(p[1] == 0x05); |
| vassert(p[2] == 0x00); |
| vassert(p[3] == 0x00); |
| vassert(p[4] == 0x00); |
| vassert(p[5] == 0x00); |
| vassert(p[6] == 0x01); |
| vassert(p[7] == 0x83); |
| vassert(p[8] == 0x15); |
| vassert(p[9] == 0x00); |
| vassert(p[10] == 0x00); |
| vassert(p[11] == 0x00); |
| vassert(p[12] == 0x00); |
| vassert(p[13] == 0x00); |
| UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter); |
| p[2] = imm32 & 0xFF; imm32 >>= 8; |
| p[3] = imm32 & 0xFF; imm32 >>= 8; |
| p[4] = imm32 & 0xFF; imm32 >>= 8; |
| p[5] = imm32 & 0xFF; imm32 >>= 8; |
| imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter); |
| p[9] = imm32 & 0xFF; imm32 >>= 8; |
| p[10] = imm32 & 0xFF; imm32 >>= 8; |
| p[11] = imm32 & 0xFF; imm32 >>= 8; |
| p[12] = imm32 & 0xFF; imm32 >>= 8; |
| VexInvalRange vir = { (HWord)place_to_patch, 14 }; |
| return vir; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- end host_x86_defs.c ---*/ |
| /*---------------------------------------------------------------*/ |