| |
| /*---------------------------------------------------------------*/ |
| /*--- begin host_amd64_defs.c ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* |
| This file is part of Valgrind, a dynamic binary instrumentation |
| framework. |
| |
| Copyright (C) 2004-2011 OpenWorks LLP |
| info@open-works.net |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. |
| |
| The GNU General Public License is contained in the file COPYING. |
| |
| Neither the names of the U.S. Department of Energy nor the |
| University of California nor the names of its contributors may be |
| used to endorse or promote products derived from this software |
| without prior written permission. |
| */ |
| |
| #include "libvex_basictypes.h" |
| #include "libvex.h" |
| #include "libvex_trc_values.h" |
| |
| #include "main_util.h" |
| #include "host_generic_regs.h" |
| #include "host_amd64_defs.h" |
| |
| |
| /* --------- Registers. --------- */ |
| |
| void ppHRegAMD64 ( HReg reg ) |
| { |
| Int r; |
| static HChar* ireg64_names[16] |
| = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", |
| "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; |
| /* Be generic for all virtual regs. */ |
| if (hregIsVirtual(reg)) { |
| ppHReg(reg); |
| return; |
| } |
| /* But specific for real regs. */ |
| switch (hregClass(reg)) { |
| case HRcInt64: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 16); |
| vex_printf("%s", ireg64_names[r]); |
| return; |
| case HRcFlt64: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 6); |
| vex_printf("%%fake%d", r); |
| return; |
| case HRcVec128: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 16); |
| vex_printf("%%xmm%d", r); |
| return; |
| default: |
| vpanic("ppHRegAMD64"); |
| } |
| } |
| |
| static void ppHRegAMD64_lo32 ( HReg reg ) |
| { |
| Int r; |
| static HChar* ireg32_names[16] |
| = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", |
| "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; |
| /* Be generic for all virtual regs. */ |
| if (hregIsVirtual(reg)) { |
| ppHReg(reg); |
| vex_printf("d"); |
| return; |
| } |
| /* But specific for real regs. */ |
| switch (hregClass(reg)) { |
| case HRcInt64: |
| r = hregNumber(reg); |
| vassert(r >= 0 && r < 16); |
| vex_printf("%s", ireg32_names[r]); |
| return; |
| default: |
| vpanic("ppHRegAMD64_lo32: invalid regclass"); |
| } |
| } |
| |
| HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); } |
| HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); } |
| HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); } |
| HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); } |
| HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); } |
| HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); } |
| HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); } |
| HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); } |
| HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); } |
| HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); } |
| HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); } |
| HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); } |
| HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); } |
| HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } |
| HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } |
| HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } |
| |
| HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } |
| HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } |
| HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); } |
| HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); } |
| HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); } |
| HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); } |
| HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); } |
| HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); } |
| HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); } |
| HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); } |
| HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); } |
| HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); } |
| |
| |
| void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr ) |
| { |
| #if 0 |
| *nregs = 6; |
| *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); |
| (*arr)[ 0] = hregAMD64_RSI(); |
| (*arr)[ 1] = hregAMD64_RDI(); |
| (*arr)[ 2] = hregAMD64_RBX(); |
| |
| (*arr)[ 3] = hregAMD64_XMM7(); |
| (*arr)[ 4] = hregAMD64_XMM8(); |
| (*arr)[ 5] = hregAMD64_XMM9(); |
| #endif |
| #if 1 |
| *nregs = 20; |
| *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); |
| (*arr)[ 0] = hregAMD64_RSI(); |
| (*arr)[ 1] = hregAMD64_RDI(); |
| (*arr)[ 2] = hregAMD64_R8(); |
| (*arr)[ 3] = hregAMD64_R9(); |
| (*arr)[ 4] = hregAMD64_R12(); |
| (*arr)[ 5] = hregAMD64_R13(); |
| (*arr)[ 6] = hregAMD64_R14(); |
| (*arr)[ 7] = hregAMD64_R15(); |
| (*arr)[ 8] = hregAMD64_RBX(); |
| |
| (*arr)[ 9] = hregAMD64_XMM3(); |
| (*arr)[10] = hregAMD64_XMM4(); |
| (*arr)[11] = hregAMD64_XMM5(); |
| (*arr)[12] = hregAMD64_XMM6(); |
| (*arr)[13] = hregAMD64_XMM7(); |
| (*arr)[14] = hregAMD64_XMM8(); |
| (*arr)[15] = hregAMD64_XMM9(); |
| (*arr)[16] = hregAMD64_XMM10(); |
| (*arr)[17] = hregAMD64_XMM11(); |
| (*arr)[18] = hregAMD64_XMM12(); |
| (*arr)[19] = hregAMD64_R10(); |
| #endif |
| } |
| |
| |
| /* --------- Condition codes, Intel encoding. --------- */ |
| |
| HChar* showAMD64CondCode ( AMD64CondCode cond ) |
| { |
| switch (cond) { |
| case Acc_O: return "o"; |
| case Acc_NO: return "no"; |
| case Acc_B: return "b"; |
| case Acc_NB: return "nb"; |
| case Acc_Z: return "z"; |
| case Acc_NZ: return "nz"; |
| case Acc_BE: return "be"; |
| case Acc_NBE: return "nbe"; |
| case Acc_S: return "s"; |
| case Acc_NS: return "ns"; |
| case Acc_P: return "p"; |
| case Acc_NP: return "np"; |
| case Acc_L: return "l"; |
| case Acc_NL: return "nl"; |
| case Acc_LE: return "le"; |
| case Acc_NLE: return "nle"; |
| case Acc_ALWAYS: return "ALWAYS"; |
| default: vpanic("ppAMD64CondCode"); |
| } |
| } |
| |
| |
| /* --------- AMD64AMode: memory address expressions. --------- */ |
| |
| AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) { |
| AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); |
| am->tag = Aam_IR; |
| am->Aam.IR.imm = imm32; |
| am->Aam.IR.reg = reg; |
| return am; |
| } |
| AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { |
| AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); |
| am->tag = Aam_IRRS; |
| am->Aam.IRRS.imm = imm32; |
| am->Aam.IRRS.base = base; |
| am->Aam.IRRS.index = indEx; |
| am->Aam.IRRS.shift = shift; |
| vassert(shift >= 0 && shift <= 3); |
| return am; |
| } |
| |
| void ppAMD64AMode ( AMD64AMode* am ) { |
| switch (am->tag) { |
| case Aam_IR: |
| if (am->Aam.IR.imm == 0) |
| vex_printf("("); |
| else |
| vex_printf("0x%x(", am->Aam.IR.imm); |
| ppHRegAMD64(am->Aam.IR.reg); |
| vex_printf(")"); |
| return; |
| case Aam_IRRS: |
| vex_printf("0x%x(", am->Aam.IRRS.imm); |
| ppHRegAMD64(am->Aam.IRRS.base); |
| vex_printf(","); |
| ppHRegAMD64(am->Aam.IRRS.index); |
| vex_printf(",%d)", 1 << am->Aam.IRRS.shift); |
| return; |
| default: |
| vpanic("ppAMD64AMode"); |
| } |
| } |
| |
| static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) { |
| switch (am->tag) { |
| case Aam_IR: |
| addHRegUse(u, HRmRead, am->Aam.IR.reg); |
| return; |
| case Aam_IRRS: |
| addHRegUse(u, HRmRead, am->Aam.IRRS.base); |
| addHRegUse(u, HRmRead, am->Aam.IRRS.index); |
| return; |
| default: |
| vpanic("addRegUsage_AMD64AMode"); |
| } |
| } |
| |
| static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) { |
| switch (am->tag) { |
| case Aam_IR: |
| am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg); |
| return; |
| case Aam_IRRS: |
| am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base); |
| am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index); |
| return; |
| default: |
| vpanic("mapRegs_AMD64AMode"); |
| } |
| } |
| |
| /* --------- Operand, which can be reg, immediate or memory. --------- */ |
| |
| AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) { |
| AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); |
| op->tag = Armi_Imm; |
| op->Armi.Imm.imm32 = imm32; |
| return op; |
| } |
| AMD64RMI* AMD64RMI_Reg ( HReg reg ) { |
| AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); |
| op->tag = Armi_Reg; |
| op->Armi.Reg.reg = reg; |
| return op; |
| } |
| AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) { |
| AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); |
| op->tag = Armi_Mem; |
| op->Armi.Mem.am = am; |
| return op; |
| } |
| |
| static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) { |
| switch (op->tag) { |
| case Armi_Imm: |
| vex_printf("$0x%x", op->Armi.Imm.imm32); |
| return; |
| case Armi_Reg: |
| if (lo32) |
| ppHRegAMD64_lo32(op->Armi.Reg.reg); |
| else |
| ppHRegAMD64(op->Armi.Reg.reg); |
| return; |
| case Armi_Mem: |
| ppAMD64AMode(op->Armi.Mem.am); |
| return; |
| default: |
| vpanic("ppAMD64RMI"); |
| } |
| } |
| void ppAMD64RMI ( AMD64RMI* op ) { |
| ppAMD64RMI_wrk(op, False/*!lo32*/); |
| } |
| void ppAMD64RMI_lo32 ( AMD64RMI* op ) { |
| ppAMD64RMI_wrk(op, True/*lo32*/); |
| } |
| |
| /* An AMD64RMI can only be used in a "read" context (what would it mean |
| to write or modify a literal?) and so we enumerate its registers |
| accordingly. */ |
| static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) { |
| switch (op->tag) { |
| case Armi_Imm: |
| return; |
| case Armi_Reg: |
| addHRegUse(u, HRmRead, op->Armi.Reg.reg); |
| return; |
| case Armi_Mem: |
| addRegUsage_AMD64AMode(u, op->Armi.Mem.am); |
| return; |
| default: |
| vpanic("addRegUsage_AMD64RMI"); |
| } |
| } |
| |
| static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) { |
| switch (op->tag) { |
| case Armi_Imm: |
| return; |
| case Armi_Reg: |
| op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg); |
| return; |
| case Armi_Mem: |
| mapRegs_AMD64AMode(m, op->Armi.Mem.am); |
| return; |
| default: |
| vpanic("mapRegs_AMD64RMI"); |
| } |
| } |
| |
| |
| /* --------- Operand, which can be reg or immediate only. --------- */ |
| |
| AMD64RI* AMD64RI_Imm ( UInt imm32 ) { |
| AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); |
| op->tag = Ari_Imm; |
| op->Ari.Imm.imm32 = imm32; |
| return op; |
| } |
| AMD64RI* AMD64RI_Reg ( HReg reg ) { |
| AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); |
| op->tag = Ari_Reg; |
| op->Ari.Reg.reg = reg; |
| return op; |
| } |
| |
| void ppAMD64RI ( AMD64RI* op ) { |
| switch (op->tag) { |
| case Ari_Imm: |
| vex_printf("$0x%x", op->Ari.Imm.imm32); |
| return; |
| case Ari_Reg: |
| ppHRegAMD64(op->Ari.Reg.reg); |
| return; |
| default: |
| vpanic("ppAMD64RI"); |
| } |
| } |
| |
| /* An AMD64RI can only be used in a "read" context (what would it mean |
| to write or modify a literal?) and so we enumerate its registers |
| accordingly. */ |
| static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) { |
| switch (op->tag) { |
| case Ari_Imm: |
| return; |
| case Ari_Reg: |
| addHRegUse(u, HRmRead, op->Ari.Reg.reg); |
| return; |
| default: |
| vpanic("addRegUsage_AMD64RI"); |
| } |
| } |
| |
| static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) { |
| switch (op->tag) { |
| case Ari_Imm: |
| return; |
| case Ari_Reg: |
| op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg); |
| return; |
| default: |
| vpanic("mapRegs_AMD64RI"); |
| } |
| } |
| |
| |
| /* --------- Operand, which can be reg or memory only. --------- */ |
| |
| AMD64RM* AMD64RM_Reg ( HReg reg ) { |
| AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); |
| op->tag = Arm_Reg; |
| op->Arm.Reg.reg = reg; |
| return op; |
| } |
| AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) { |
| AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); |
| op->tag = Arm_Mem; |
| op->Arm.Mem.am = am; |
| return op; |
| } |
| |
| void ppAMD64RM ( AMD64RM* op ) { |
| switch (op->tag) { |
| case Arm_Mem: |
| ppAMD64AMode(op->Arm.Mem.am); |
| return; |
| case Arm_Reg: |
| ppHRegAMD64(op->Arm.Reg.reg); |
| return; |
| default: |
| vpanic("ppAMD64RM"); |
| } |
| } |
| |
| /* Because an AMD64RM can be both a source or destination operand, we |
| have to supply a mode -- pertaining to the operand as a whole -- |
| indicating how it's being used. */ |
| static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) { |
| switch (op->tag) { |
| case Arm_Mem: |
| /* Memory is read, written or modified. So we just want to |
| know the regs read by the amode. */ |
| addRegUsage_AMD64AMode(u, op->Arm.Mem.am); |
| return; |
| case Arm_Reg: |
| /* reg is read, written or modified. Add it in the |
| appropriate way. */ |
| addHRegUse(u, mode, op->Arm.Reg.reg); |
| return; |
| default: |
| vpanic("addRegUsage_AMD64RM"); |
| } |
| } |
| |
| static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op ) |
| { |
| switch (op->tag) { |
| case Arm_Mem: |
| mapRegs_AMD64AMode(m, op->Arm.Mem.am); |
| return; |
| case Arm_Reg: |
| op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg); |
| return; |
| default: |
| vpanic("mapRegs_AMD64RM"); |
| } |
| } |
| |
| |
| /* --------- Instructions. --------- */ |
| |
| static HChar* showAMD64ScalarSz ( Int sz ) { |
| switch (sz) { |
| case 2: return "w"; |
| case 4: return "l"; |
| case 8: return "q"; |
| default: vpanic("showAMD64ScalarSz"); |
| } |
| } |
| |
| HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) { |
| switch (op) { |
| case Aun_NOT: return "not"; |
| case Aun_NEG: return "neg"; |
| default: vpanic("showAMD64UnaryOp"); |
| } |
| } |
| |
| HChar* showAMD64AluOp ( AMD64AluOp op ) { |
| switch (op) { |
| case Aalu_MOV: return "mov"; |
| case Aalu_CMP: return "cmp"; |
| case Aalu_ADD: return "add"; |
| case Aalu_SUB: return "sub"; |
| case Aalu_ADC: return "adc"; |
| case Aalu_SBB: return "sbb"; |
| case Aalu_AND: return "and"; |
| case Aalu_OR: return "or"; |
| case Aalu_XOR: return "xor"; |
| case Aalu_MUL: return "imul"; |
| default: vpanic("showAMD64AluOp"); |
| } |
| } |
| |
| HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { |
| switch (op) { |
| case Ash_SHL: return "shl"; |
| case Ash_SHR: return "shr"; |
| case Ash_SAR: return "sar"; |
| default: vpanic("showAMD64ShiftOp"); |
| } |
| } |
| |
| HChar* showA87FpOp ( A87FpOp op ) { |
| switch (op) { |
| case Afp_SCALE: return "scale"; |
| case Afp_ATAN: return "atan"; |
| case Afp_YL2X: return "yl2x"; |
| case Afp_YL2XP1: return "yl2xp1"; |
| case Afp_PREM: return "prem"; |
| case Afp_PREM1: return "prem1"; |
| case Afp_SQRT: return "sqrt"; |
| case Afp_SIN: return "sin"; |
| case Afp_COS: return "cos"; |
| case Afp_TAN: return "tan"; |
| case Afp_ROUND: return "round"; |
| case Afp_2XM1: return "2xm1"; |
| default: vpanic("showA87FpOp"); |
| } |
| } |
| |
| HChar* showAMD64SseOp ( AMD64SseOp op ) { |
| switch (op) { |
| case Asse_MOV: return "movups"; |
| case Asse_ADDF: return "add"; |
| case Asse_SUBF: return "sub"; |
| case Asse_MULF: return "mul"; |
| case Asse_DIVF: return "div"; |
| case Asse_MAXF: return "max"; |
| case Asse_MINF: return "min"; |
| case Asse_CMPEQF: return "cmpFeq"; |
| case Asse_CMPLTF: return "cmpFlt"; |
| case Asse_CMPLEF: return "cmpFle"; |
| case Asse_CMPUNF: return "cmpFun"; |
| case Asse_RCPF: return "rcp"; |
| case Asse_RSQRTF: return "rsqrt"; |
| case Asse_SQRTF: return "sqrt"; |
| case Asse_AND: return "and"; |
| case Asse_OR: return "or"; |
| case Asse_XOR: return "xor"; |
| case Asse_ANDN: return "andn"; |
| case Asse_ADD8: return "paddb"; |
| case Asse_ADD16: return "paddw"; |
| case Asse_ADD32: return "paddd"; |
| case Asse_ADD64: return "paddq"; |
| case Asse_QADD8U: return "paddusb"; |
| case Asse_QADD16U: return "paddusw"; |
| case Asse_QADD8S: return "paddsb"; |
| case Asse_QADD16S: return "paddsw"; |
| case Asse_SUB8: return "psubb"; |
| case Asse_SUB16: return "psubw"; |
| case Asse_SUB32: return "psubd"; |
| case Asse_SUB64: return "psubq"; |
| case Asse_QSUB8U: return "psubusb"; |
| case Asse_QSUB16U: return "psubusw"; |
| case Asse_QSUB8S: return "psubsb"; |
| case Asse_QSUB16S: return "psubsw"; |
| case Asse_MUL16: return "pmullw"; |
| case Asse_MULHI16U: return "pmulhuw"; |
| case Asse_MULHI16S: return "pmulhw"; |
| case Asse_AVG8U: return "pavgb"; |
| case Asse_AVG16U: return "pavgw"; |
| case Asse_MAX16S: return "pmaxw"; |
| case Asse_MAX8U: return "pmaxub"; |
| case Asse_MIN16S: return "pminw"; |
| case Asse_MIN8U: return "pminub"; |
| case Asse_CMPEQ8: return "pcmpeqb"; |
| case Asse_CMPEQ16: return "pcmpeqw"; |
| case Asse_CMPEQ32: return "pcmpeqd"; |
| case Asse_CMPGT8S: return "pcmpgtb"; |
| case Asse_CMPGT16S: return "pcmpgtw"; |
| case Asse_CMPGT32S: return "pcmpgtd"; |
| case Asse_SHL16: return "psllw"; |
| case Asse_SHL32: return "pslld"; |
| case Asse_SHL64: return "psllq"; |
| case Asse_SHR16: return "psrlw"; |
| case Asse_SHR32: return "psrld"; |
| case Asse_SHR64: return "psrlq"; |
| case Asse_SAR16: return "psraw"; |
| case Asse_SAR32: return "psrad"; |
| case Asse_PACKSSD: return "packssdw"; |
| case Asse_PACKSSW: return "packsswb"; |
| case Asse_PACKUSW: return "packuswb"; |
| case Asse_UNPCKHB: return "punpckhb"; |
| case Asse_UNPCKHW: return "punpckhw"; |
| case Asse_UNPCKHD: return "punpckhd"; |
| case Asse_UNPCKHQ: return "punpckhq"; |
| case Asse_UNPCKLB: return "punpcklb"; |
| case Asse_UNPCKLW: return "punpcklw"; |
| case Asse_UNPCKLD: return "punpckld"; |
| case Asse_UNPCKLQ: return "punpcklq"; |
| default: vpanic("showAMD64SseOp"); |
| } |
| } |
| |
| AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Imm64; |
| i->Ain.Imm64.imm64 = imm64; |
| i->Ain.Imm64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Alu64R; |
| i->Ain.Alu64R.op = op; |
| i->Ain.Alu64R.src = src; |
| i->Ain.Alu64R.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Alu64M; |
| i->Ain.Alu64M.op = op; |
| i->Ain.Alu64M.src = src; |
| i->Ain.Alu64M.dst = dst; |
| vassert(op != Aalu_MUL); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Sh64; |
| i->Ain.Sh64.op = op; |
| i->Ain.Sh64.src = src; |
| i->Ain.Sh64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Test64; |
| i->Ain.Test64.imm32 = imm32; |
| i->Ain.Test64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Unary64; |
| i->Ain.Unary64.op = op; |
| i->Ain.Unary64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Lea64; |
| i->Ain.Lea64.am = am; |
| i->Ain.Lea64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Alu32R; |
| i->Ain.Alu32R.op = op; |
| i->Ain.Alu32R.src = src; |
| i->Ain.Alu32R.dst = dst; |
| switch (op) { |
| case Aalu_ADD: case Aalu_SUB: case Aalu_CMP: |
| case Aalu_AND: case Aalu_OR: case Aalu_XOR: break; |
| default: vassert(0); |
| } |
| return i; |
| } |
| AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_MulL; |
| i->Ain.MulL.syned = syned; |
| i->Ain.MulL.src = src; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Div; |
| i->Ain.Div.syned = syned; |
| i->Ain.Div.sz = sz; |
| i->Ain.Div.src = src; |
| vassert(sz == 4 || sz == 8); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Push; |
| i->Ain.Push.src = src; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Call; |
| i->Ain.Call.cond = cond; |
| i->Ain.Call.target = target; |
| i->Ain.Call.regparms = regparms; |
| vassert(regparms >= 0 && regparms <= 6); |
| return i; |
| } |
| |
| AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP, |
| AMD64CondCode cond, Bool toFastEP ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_XDirect; |
| i->Ain.XDirect.dstGA = dstGA; |
| i->Ain.XDirect.amRIP = amRIP; |
| i->Ain.XDirect.cond = cond; |
| i->Ain.XDirect.toFastEP = toFastEP; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP, |
| AMD64CondCode cond ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_XIndir; |
| i->Ain.XIndir.dstGA = dstGA; |
| i->Ain.XIndir.amRIP = amRIP; |
| i->Ain.XIndir.cond = cond; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP, |
| AMD64CondCode cond, IRJumpKind jk ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_XAssisted; |
| i->Ain.XAssisted.dstGA = dstGA; |
| i->Ain.XAssisted.amRIP = amRIP; |
| i->Ain.XAssisted.cond = cond; |
| i->Ain.XAssisted.jk = jk; |
| return i; |
| } |
| |
| AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_CMov64; |
| i->Ain.CMov64.cond = cond; |
| i->Ain.CMov64.src = src; |
| i->Ain.CMov64.dst = dst; |
| vassert(cond != Acc_ALWAYS); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_MovxLQ; |
| i->Ain.MovxLQ.syned = syned; |
| i->Ain.MovxLQ.src = src; |
| i->Ain.MovxLQ.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, |
| AMD64AMode* src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_LoadEX; |
| i->Ain.LoadEX.szSmall = szSmall; |
| i->Ain.LoadEX.syned = syned; |
| i->Ain.LoadEX.src = src; |
| i->Ain.LoadEX.dst = dst; |
| vassert(szSmall == 1 || szSmall == 2 || szSmall == 4); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Store; |
| i->Ain.Store.sz = sz; |
| i->Ain.Store.src = src; |
| i->Ain.Store.dst = dst; |
| vassert(sz == 1 || sz == 2 || sz == 4); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Set64; |
| i->Ain.Set64.cond = cond; |
| i->Ain.Set64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Bsfr64; |
| i->Ain.Bsfr64.isFwds = isFwds; |
| i->Ain.Bsfr64.src = src; |
| i->Ain.Bsfr64.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_MFence ( void ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_MFence; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_ACAS; |
| i->Ain.ACAS.addr = addr; |
| i->Ain.ACAS.sz = sz; |
| vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_DACAS; |
| i->Ain.DACAS.addr = addr; |
| i->Ain.DACAS.sz = sz; |
| vassert(sz == 8 || sz == 4); |
| return i; |
| } |
| |
| AMD64Instr* AMD64Instr_A87Free ( Int nregs ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_A87Free; |
| i->Ain.A87Free.nregs = nregs; |
| vassert(nregs >= 1 && nregs <= 7); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_A87PushPop; |
| i->Ain.A87PushPop.addr = addr; |
| i->Ain.A87PushPop.isPush = isPush; |
| i->Ain.A87PushPop.szB = szB; |
| vassert(szB == 8 || szB == 4); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_A87FpOp; |
| i->Ain.A87FpOp.op = op; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_A87LdCW; |
| i->Ain.A87LdCW.addr = addr; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_A87StSW; |
| i->Ain.A87StSW.addr = addr; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_LdMXCSR; |
| i->Ain.LdMXCSR.addr = addr; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseUComIS; |
| i->Ain.SseUComIS.sz = toUChar(sz); |
| i->Ain.SseUComIS.srcL = srcL; |
| i->Ain.SseUComIS.srcR = srcR; |
| i->Ain.SseUComIS.dst = dst; |
| vassert(sz == 4 || sz == 8); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseSI2SF; |
| i->Ain.SseSI2SF.szS = toUChar(szS); |
| i->Ain.SseSI2SF.szD = toUChar(szD); |
| i->Ain.SseSI2SF.src = src; |
| i->Ain.SseSI2SF.dst = dst; |
| vassert(szS == 4 || szS == 8); |
| vassert(szD == 4 || szD == 8); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseSF2SI; |
| i->Ain.SseSF2SI.szS = toUChar(szS); |
| i->Ain.SseSF2SI.szD = toUChar(szD); |
| i->Ain.SseSF2SI.src = src; |
| i->Ain.SseSF2SI.dst = dst; |
| vassert(szS == 4 || szS == 8); |
| vassert(szD == 4 || szD == 8); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseSDSS; |
| i->Ain.SseSDSS.from64 = from64; |
| i->Ain.SseSDSS.src = src; |
| i->Ain.SseSDSS.dst = dst; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, |
| HReg reg, AMD64AMode* addr ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseLdSt; |
| i->Ain.SseLdSt.isLoad = isLoad; |
| i->Ain.SseLdSt.sz = toUChar(sz); |
| i->Ain.SseLdSt.reg = reg; |
| i->Ain.SseLdSt.addr = addr; |
| vassert(sz == 4 || sz == 8 || sz == 16); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr ) |
| { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseLdzLO; |
| i->Ain.SseLdzLO.sz = sz; |
| i->Ain.SseLdzLO.reg = reg; |
| i->Ain.SseLdzLO.addr = addr; |
| vassert(sz == 4 || sz == 8); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Sse32Fx4; |
| i->Ain.Sse32Fx4.op = op; |
| i->Ain.Sse32Fx4.src = src; |
| i->Ain.Sse32Fx4.dst = dst; |
| vassert(op != Asse_MOV); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Sse32FLo; |
| i->Ain.Sse32FLo.op = op; |
| i->Ain.Sse32FLo.src = src; |
| i->Ain.Sse32FLo.dst = dst; |
| vassert(op != Asse_MOV); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Sse64Fx2; |
| i->Ain.Sse64Fx2.op = op; |
| i->Ain.Sse64Fx2.src = src; |
| i->Ain.Sse64Fx2.dst = dst; |
| vassert(op != Asse_MOV); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_Sse64FLo; |
| i->Ain.Sse64FLo.op = op; |
| i->Ain.Sse64FLo.src = src; |
| i->Ain.Sse64FLo.dst = dst; |
| vassert(op != Asse_MOV); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseReRg; |
| i->Ain.SseReRg.op = op; |
| i->Ain.SseReRg.src = re; |
| i->Ain.SseReRg.dst = rg; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseCMov; |
| i->Ain.SseCMov.cond = cond; |
| i->Ain.SseCMov.src = src; |
| i->Ain.SseCMov.dst = dst; |
| vassert(cond != Acc_ALWAYS); |
| return i; |
| } |
| AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_SseShuf; |
| i->Ain.SseShuf.order = order; |
| i->Ain.SseShuf.src = src; |
| i->Ain.SseShuf.dst = dst; |
| vassert(order >= 0 && order <= 0xFF); |
| return i; |
| } |
| //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad, |
| //uu HReg reg, AMD64AMode* addr ) { |
| //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| //uu i->tag = Ain_AvxLdSt; |
| //uu i->Ain.AvxLdSt.isLoad = isLoad; |
| //uu i->Ain.AvxLdSt.reg = reg; |
| //uu i->Ain.AvxLdSt.addr = addr; |
| //uu return i; |
| //uu } |
| //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) { |
| //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| //uu i->tag = Ain_AvxReRg; |
| //uu i->Ain.AvxReRg.op = op; |
| //uu i->Ain.AvxReRg.src = re; |
| //uu i->Ain.AvxReRg.dst = rg; |
| //uu return i; |
| //uu } |
| AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, |
| AMD64AMode* amFailAddr ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_EvCheck; |
| i->Ain.EvCheck.amCounter = amCounter; |
| i->Ain.EvCheck.amFailAddr = amFailAddr; |
| return i; |
| } |
| AMD64Instr* AMD64Instr_ProfInc ( void ) { |
| AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); |
| i->tag = Ain_ProfInc; |
| return i; |
| } |
| |
| void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) |
| { |
| vassert(mode64 == True); |
| switch (i->tag) { |
| case Ain_Imm64: |
| vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64); |
| ppHRegAMD64(i->Ain.Imm64.dst); |
| return; |
| case Ain_Alu64R: |
| vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op)); |
| ppAMD64RMI(i->Ain.Alu64R.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Alu64R.dst); |
| return; |
| case Ain_Alu64M: |
| vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op)); |
| ppAMD64RI(i->Ain.Alu64M.src); |
| vex_printf(","); |
| ppAMD64AMode(i->Ain.Alu64M.dst); |
| return; |
| case Ain_Sh64: |
| vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op)); |
| if (i->Ain.Sh64.src == 0) |
| vex_printf("%%cl,"); |
| else |
| vex_printf("$%d,", (Int)i->Ain.Sh64.src); |
| ppHRegAMD64(i->Ain.Sh64.dst); |
| return; |
| case Ain_Test64: |
| vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); |
| ppHRegAMD64(i->Ain.Test64.dst); |
| return; |
| case Ain_Unary64: |
| vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op)); |
| ppHRegAMD64(i->Ain.Unary64.dst); |
| return; |
| case Ain_Lea64: |
| vex_printf("leaq "); |
| ppAMD64AMode(i->Ain.Lea64.am); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Lea64.dst); |
| return; |
| case Ain_Alu32R: |
| vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op)); |
| ppAMD64RMI_lo32(i->Ain.Alu32R.src); |
| vex_printf(","); |
| ppHRegAMD64_lo32(i->Ain.Alu32R.dst); |
| return; |
| case Ain_MulL: |
| vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u'); |
| ppAMD64RM(i->Ain.MulL.src); |
| return; |
| case Ain_Div: |
| vex_printf("%cdiv%s ", |
| i->Ain.Div.syned ? 's' : 'u', |
| showAMD64ScalarSz(i->Ain.Div.sz)); |
| ppAMD64RM(i->Ain.Div.src); |
| return; |
| case Ain_Push: |
| vex_printf("pushq "); |
| ppAMD64RMI(i->Ain.Push.src); |
| return; |
| case Ain_Call: |
| vex_printf("call%s[%d] ", |
| i->Ain.Call.cond==Acc_ALWAYS |
| ? "" : showAMD64CondCode(i->Ain.Call.cond), |
| i->Ain.Call.regparms ); |
| vex_printf("0x%llx", i->Ain.Call.target); |
| break; |
| |
| case Ain_XDirect: |
| vex_printf("(xDirect) "); |
| vex_printf("if (%%rflags.%s) { ", |
| showAMD64CondCode(i->Ain.XDirect.cond)); |
| vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA); |
| vex_printf("movq %%r11,"); |
| ppAMD64AMode(i->Ain.XDirect.amRIP); |
| vex_printf("; "); |
| vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }", |
| i->Ain.XDirect.toFastEP ? "fast" : "slow"); |
| return; |
| case Ain_XIndir: |
| vex_printf("(xIndir) "); |
| vex_printf("if (%%rflags.%s) { ", |
| showAMD64CondCode(i->Ain.XIndir.cond)); |
| vex_printf("movq "); |
| ppHRegAMD64(i->Ain.XIndir.dstGA); |
| vex_printf(","); |
| ppAMD64AMode(i->Ain.XIndir.amRIP); |
| vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }"); |
| return; |
| case Ain_XAssisted: |
| vex_printf("(xAssisted) "); |
| vex_printf("if (%%rflags.%s) { ", |
| showAMD64CondCode(i->Ain.XAssisted.cond)); |
| vex_printf("movq "); |
| ppHRegAMD64(i->Ain.XAssisted.dstGA); |
| vex_printf(","); |
| ppAMD64AMode(i->Ain.XAssisted.amRIP); |
| vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp", |
| (Int)i->Ain.XAssisted.jk); |
| vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }"); |
| return; |
| |
| case Ain_CMov64: |
| vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); |
| ppAMD64RM(i->Ain.CMov64.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.CMov64.dst); |
| return; |
| case Ain_MovxLQ: |
| vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z'); |
| ppHRegAMD64_lo32(i->Ain.MovxLQ.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.MovxLQ.dst); |
| return; |
| case Ain_LoadEX: |
| if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) { |
| vex_printf("movl "); |
| ppAMD64AMode(i->Ain.LoadEX.src); |
| vex_printf(","); |
| ppHRegAMD64_lo32(i->Ain.LoadEX.dst); |
| } else { |
| vex_printf("mov%c%cq ", |
| i->Ain.LoadEX.syned ? 's' : 'z', |
| i->Ain.LoadEX.szSmall==1 |
| ? 'b' |
| : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l')); |
| ppAMD64AMode(i->Ain.LoadEX.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.LoadEX.dst); |
| } |
| return; |
| case Ain_Store: |
| vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b' |
| : (i->Ain.Store.sz==2 ? 'w' : 'l')); |
| ppHRegAMD64(i->Ain.Store.src); |
| vex_printf(","); |
| ppAMD64AMode(i->Ain.Store.dst); |
| return; |
| case Ain_Set64: |
| vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond)); |
| ppHRegAMD64(i->Ain.Set64.dst); |
| return; |
| case Ain_Bsfr64: |
| vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r'); |
| ppHRegAMD64(i->Ain.Bsfr64.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Bsfr64.dst); |
| return; |
| case Ain_MFence: |
| vex_printf("mfence" ); |
| return; |
| case Ain_ACAS: |
| vex_printf("lock cmpxchg%c ", |
| i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w' |
| : i->Ain.ACAS.sz==4 ? 'l' : 'q' ); |
| vex_printf("{%%rax->%%rbx},"); |
| ppAMD64AMode(i->Ain.ACAS.addr); |
| return; |
| case Ain_DACAS: |
| vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},", |
| (Int)(2 * i->Ain.DACAS.sz)); |
| ppAMD64AMode(i->Ain.DACAS.addr); |
| return; |
| case Ain_A87Free: |
| vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); |
| break; |
| case Ain_A87PushPop: |
| vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ", |
| i->Ain.A87PushPop.szB == 4 ? 's' : 'l'); |
| ppAMD64AMode(i->Ain.A87PushPop.addr); |
| break; |
| case Ain_A87FpOp: |
| vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op)); |
| break; |
| case Ain_A87LdCW: |
| vex_printf("fldcw "); |
| ppAMD64AMode(i->Ain.A87LdCW.addr); |
| break; |
| case Ain_A87StSW: |
| vex_printf("fstsw "); |
| ppAMD64AMode(i->Ain.A87StSW.addr); |
| break; |
| case Ain_LdMXCSR: |
| vex_printf("ldmxcsr "); |
| ppAMD64AMode(i->Ain.LdMXCSR.addr); |
| break; |
| case Ain_SseUComIS: |
| vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d"); |
| ppHRegAMD64(i->Ain.SseUComIS.srcL); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseUComIS.srcR); |
| vex_printf(" ; pushfq ; popq "); |
| ppHRegAMD64(i->Ain.SseUComIS.dst); |
| break; |
| case Ain_SseSI2SF: |
| vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d"); |
| (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) |
| (i->Ain.SseSI2SF.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseSI2SF.dst); |
| break; |
| case Ain_SseSF2SI: |
| vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d"); |
| ppHRegAMD64(i->Ain.SseSF2SI.src); |
| vex_printf(","); |
| (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) |
| (i->Ain.SseSF2SI.dst); |
| break; |
| case Ain_SseSDSS: |
| vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd "); |
| ppHRegAMD64(i->Ain.SseSDSS.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseSDSS.dst); |
| break; |
| case Ain_SseLdSt: |
| switch (i->Ain.SseLdSt.sz) { |
| case 4: vex_printf("movss "); break; |
| case 8: vex_printf("movsd "); break; |
| case 16: vex_printf("movups "); break; |
| default: vassert(0); |
| } |
| if (i->Ain.SseLdSt.isLoad) { |
| ppAMD64AMode(i->Ain.SseLdSt.addr); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseLdSt.reg); |
| } else { |
| ppHRegAMD64(i->Ain.SseLdSt.reg); |
| vex_printf(","); |
| ppAMD64AMode(i->Ain.SseLdSt.addr); |
| } |
| return; |
| case Ain_SseLdzLO: |
| vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d"); |
| ppAMD64AMode(i->Ain.SseLdzLO.addr); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseLdzLO.reg); |
| return; |
| case Ain_Sse32Fx4: |
| vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op)); |
| ppHRegAMD64(i->Ain.Sse32Fx4.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Sse32Fx4.dst); |
| return; |
| case Ain_Sse32FLo: |
| vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op)); |
| ppHRegAMD64(i->Ain.Sse32FLo.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Sse32FLo.dst); |
| return; |
| case Ain_Sse64Fx2: |
| vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); |
| ppHRegAMD64(i->Ain.Sse64Fx2.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Sse64Fx2.dst); |
| return; |
| case Ain_Sse64FLo: |
| vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); |
| ppHRegAMD64(i->Ain.Sse64FLo.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.Sse64FLo.dst); |
| return; |
| case Ain_SseReRg: |
| vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op)); |
| ppHRegAMD64(i->Ain.SseReRg.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseReRg.dst); |
| return; |
| case Ain_SseCMov: |
| vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond)); |
| ppHRegAMD64(i->Ain.SseCMov.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseCMov.dst); |
| return; |
| case Ain_SseShuf: |
| vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order); |
| ppHRegAMD64(i->Ain.SseShuf.src); |
| vex_printf(","); |
| ppHRegAMD64(i->Ain.SseShuf.dst); |
| return; |
| //uu case Ain_AvxLdSt: |
| //uu vex_printf("vmovups "); |
| //uu if (i->Ain.AvxLdSt.isLoad) { |
| //uu ppAMD64AMode(i->Ain.AvxLdSt.addr); |
| //uu vex_printf(","); |
| //uu ppHRegAMD64(i->Ain.AvxLdSt.reg); |
| //uu } else { |
| //uu ppHRegAMD64(i->Ain.AvxLdSt.reg); |
| //uu vex_printf(","); |
| //uu ppAMD64AMode(i->Ain.AvxLdSt.addr); |
| //uu } |
| //uu return; |
| //uu case Ain_AvxReRg: |
| //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op)); |
| //uu ppHRegAMD64(i->Ain.AvxReRg.src); |
| //uu vex_printf(","); |
| //uu ppHRegAMD64(i->Ain.AvxReRg.dst); |
| //uu return; |
| case Ain_EvCheck: |
| vex_printf("(evCheck) decl "); |
| ppAMD64AMode(i->Ain.EvCheck.amCounter); |
| vex_printf("; jns nofail; jmp *"); |
| ppAMD64AMode(i->Ain.EvCheck.amFailAddr); |
| vex_printf("; nofail:"); |
| return; |
| case Ain_ProfInc: |
| vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)"); |
| return; |
| default: |
| vpanic("ppAMD64Instr"); |
| } |
| } |
| |
| /* --------- Helpers for register allocation. --------- */ |
| |
| void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) |
| { |
| Bool unary; |
| vassert(mode64 == True); |
| initHRegUsage(u); |
| switch (i->tag) { |
| case Ain_Imm64: |
| addHRegUse(u, HRmWrite, i->Ain.Imm64.dst); |
| return; |
| case Ain_Alu64R: |
| addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); |
| if (i->Ain.Alu64R.op == Aalu_MOV) { |
| addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); |
| return; |
| } |
| if (i->Ain.Alu64R.op == Aalu_CMP) { |
| addHRegUse(u, HRmRead, i->Ain.Alu64R.dst); |
| return; |
| } |
| addHRegUse(u, HRmModify, i->Ain.Alu64R.dst); |
| return; |
| case Ain_Alu64M: |
| addRegUsage_AMD64RI(u, i->Ain.Alu64M.src); |
| addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst); |
| return; |
| case Ain_Sh64: |
| addHRegUse(u, HRmModify, i->Ain.Sh64.dst); |
| if (i->Ain.Sh64.src == 0) |
| addHRegUse(u, HRmRead, hregAMD64_RCX()); |
| return; |
| case Ain_Test64: |
| addHRegUse(u, HRmRead, i->Ain.Test64.dst); |
| return; |
| case Ain_Unary64: |
| addHRegUse(u, HRmModify, i->Ain.Unary64.dst); |
| return; |
| case Ain_Lea64: |
| addRegUsage_AMD64AMode(u, i->Ain.Lea64.am); |
| addHRegUse(u, HRmWrite, i->Ain.Lea64.dst); |
| return; |
| case Ain_Alu32R: |
| vassert(i->Ain.Alu32R.op != Aalu_MOV); |
| addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src); |
| if (i->Ain.Alu32R.op == Aalu_CMP) { |
| addHRegUse(u, HRmRead, i->Ain.Alu32R.dst); |
| return; |
| } |
| addHRegUse(u, HRmModify, i->Ain.Alu32R.dst); |
| return; |
| case Ain_MulL: |
| addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead); |
| addHRegUse(u, HRmModify, hregAMD64_RAX()); |
| addHRegUse(u, HRmWrite, hregAMD64_RDX()); |
| return; |
| case Ain_Div: |
| addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead); |
| addHRegUse(u, HRmModify, hregAMD64_RAX()); |
| addHRegUse(u, HRmModify, hregAMD64_RDX()); |
| return; |
| case Ain_Push: |
| addRegUsage_AMD64RMI(u, i->Ain.Push.src); |
| addHRegUse(u, HRmModify, hregAMD64_RSP()); |
| return; |
| case Ain_Call: |
| /* This is a bit subtle. */ |
| /* First off, claim it trashes all the caller-saved regs |
| which fall within the register allocator's jurisdiction. |
| These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 |
| and all the xmm registers. |
| */ |
| addHRegUse(u, HRmWrite, hregAMD64_RAX()); |
| addHRegUse(u, HRmWrite, hregAMD64_RCX()); |
| addHRegUse(u, HRmWrite, hregAMD64_RDX()); |
| addHRegUse(u, HRmWrite, hregAMD64_RSI()); |
| addHRegUse(u, HRmWrite, hregAMD64_RDI()); |
| addHRegUse(u, HRmWrite, hregAMD64_R8()); |
| addHRegUse(u, HRmWrite, hregAMD64_R9()); |
| addHRegUse(u, HRmWrite, hregAMD64_R10()); |
| addHRegUse(u, HRmWrite, hregAMD64_R11()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM0()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM1()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM3()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM4()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM5()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM6()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM7()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM8()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM9()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM10()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM11()); |
| addHRegUse(u, HRmWrite, hregAMD64_XMM12()); |
| |
| /* Now we have to state any parameter-carrying registers |
| which might be read. This depends on the regparmness. */ |
| switch (i->Ain.Call.regparms) { |
| case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/ |
| case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/ |
| case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/ |
| case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/ |
| case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/ |
| case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break; |
| case 0: break; |
| default: vpanic("getRegUsage_AMD64Instr:Call:regparms"); |
| } |
| /* Finally, there is the issue that the insn trashes a |
| register because the literal target address has to be |
| loaded into a register. Fortunately, r11 is stated in the |
| ABI as a scratch register, and so seems a suitable victim. */ |
| addHRegUse(u, HRmWrite, hregAMD64_R11()); |
| /* Upshot of this is that the assembler really must use r11, |
| and no other, as a destination temporary. */ |
| return; |
| /* XDirect/XIndir/XAssisted are also a bit subtle. They |
| conditionally exit the block. Hence we only need to list (1) |
| the registers that they read, and (2) the registers that they |
| write in the case where the block is not exited. (2) is |
| empty, hence only (1) is relevant here. */ |
| case Ain_XDirect: |
| /* Don't bother to mention the write to %r11, since it is not |
| available to the allocator. */ |
| addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP); |
| return; |
| case Ain_XIndir: |
| /* Ditto re %r11 */ |
| addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA); |
| addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP); |
| return; |
| case Ain_XAssisted: |
| /* Ditto re %r11 and %rbp (the baseblock ptr) */ |
| addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA); |
| addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP); |
| return; |
| case Ain_CMov64: |
| addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); |
| addHRegUse(u, HRmModify, i->Ain.CMov64.dst); |
| return; |
| case Ain_MovxLQ: |
| addHRegUse(u, HRmRead, i->Ain.MovxLQ.src); |
| addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst); |
| return; |
| case Ain_LoadEX: |
| addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src); |
| addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst); |
| return; |
| case Ain_Store: |
| addHRegUse(u, HRmRead, i->Ain.Store.src); |
| addRegUsage_AMD64AMode(u, i->Ain.Store.dst); |
| return; |
| case Ain_Set64: |
| addHRegUse(u, HRmWrite, i->Ain.Set64.dst); |
| return; |
| case Ain_Bsfr64: |
| addHRegUse(u, HRmRead, i->Ain.Bsfr64.src); |
| addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst); |
| return; |
| case Ain_MFence: |
| return; |
| case Ain_ACAS: |
| addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr); |
| addHRegUse(u, HRmRead, hregAMD64_RBX()); |
| addHRegUse(u, HRmModify, hregAMD64_RAX()); |
| return; |
| case Ain_DACAS: |
| addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr); |
| addHRegUse(u, HRmRead, hregAMD64_RCX()); |
| addHRegUse(u, HRmRead, hregAMD64_RBX()); |
| addHRegUse(u, HRmModify, hregAMD64_RDX()); |
| addHRegUse(u, HRmModify, hregAMD64_RAX()); |
| return; |
| case Ain_A87Free: |
| return; |
| case Ain_A87PushPop: |
| addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr); |
| return; |
| case Ain_A87FpOp: |
| return; |
| case Ain_A87LdCW: |
| addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr); |
| return; |
| case Ain_A87StSW: |
| addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr); |
| return; |
| case Ain_LdMXCSR: |
| addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr); |
| return; |
| case Ain_SseUComIS: |
| addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL); |
| addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR); |
| addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst); |
| return; |
| case Ain_SseSI2SF: |
| addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src); |
| addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst); |
| return; |
| case Ain_SseSF2SI: |
| addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src); |
| addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst); |
| return; |
| case Ain_SseSDSS: |
| addHRegUse(u, HRmRead, i->Ain.SseSDSS.src); |
| addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst); |
| return; |
| case Ain_SseLdSt: |
| addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr); |
| addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead, |
| i->Ain.SseLdSt.reg); |
| return; |
| case Ain_SseLdzLO: |
| addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr); |
| addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg); |
| return; |
| case Ain_Sse32Fx4: |
| vassert(i->Ain.Sse32Fx4.op != Asse_MOV); |
| unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF |
| || i->Ain.Sse32Fx4.op == Asse_RSQRTF |
| || i->Ain.Sse32Fx4.op == Asse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Ain.Sse32Fx4.dst); |
| return; |
| case Ain_Sse32FLo: |
| vassert(i->Ain.Sse32FLo.op != Asse_MOV); |
| unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF |
| || i->Ain.Sse32FLo.op == Asse_RSQRTF |
| || i->Ain.Sse32FLo.op == Asse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Ain.Sse32FLo.dst); |
| return; |
| case Ain_Sse64Fx2: |
| vassert(i->Ain.Sse64Fx2.op != Asse_MOV); |
| unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF |
| || i->Ain.Sse64Fx2.op == Asse_RSQRTF |
| || i->Ain.Sse64Fx2.op == Asse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Ain.Sse64Fx2.dst); |
| return; |
| case Ain_Sse64FLo: |
| vassert(i->Ain.Sse64FLo.op != Asse_MOV); |
| unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF |
| || i->Ain.Sse64FLo.op == Asse_RSQRTF |
| || i->Ain.Sse64FLo.op == Asse_SQRTF ); |
| addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src); |
| addHRegUse(u, unary ? HRmWrite : HRmModify, |
| i->Ain.Sse64FLo.dst); |
| return; |
| case Ain_SseReRg: |
| if ( (i->Ain.SseReRg.op == Asse_XOR |
| || i->Ain.SseReRg.op == Asse_CMPEQ32) |
| && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) { |
| /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd |
| r,r' as a write of a value to r, and independent of any |
| previous value in r */ |
| /* (as opposed to a rite of passage :-) */ |
| addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst); |
| } else { |
| addHRegUse(u, HRmRead, i->Ain.SseReRg.src); |
| addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV |
| ? HRmWrite : HRmModify, |
| i->Ain.SseReRg.dst); |
| } |
| return; |
| case Ain_SseCMov: |
| addHRegUse(u, HRmRead, i->Ain.SseCMov.src); |
| addHRegUse(u, HRmModify, i->Ain.SseCMov.dst); |
| return; |
| case Ain_SseShuf: |
| addHRegUse(u, HRmRead, i->Ain.SseShuf.src); |
| addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst); |
| return; |
| //uu case Ain_AvxLdSt: |
| //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr); |
| //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead, |
| //uu i->Ain.AvxLdSt.reg); |
| //uu return; |
| //uu case Ain_AvxReRg: |
| //uu if ( (i->Ain.AvxReRg.op == Asse_XOR |
| //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32) |
| //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) { |
| //uu /* See comments on the case for Ain_SseReRg. */ |
| //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst); |
| //uu } else { |
| //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src); |
| //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV |
| //uu ? HRmWrite : HRmModify, |
| //uu i->Ain.AvxReRg.dst); |
| //uu } |
| //uu return; |
| case Ain_EvCheck: |
| /* We expect both amodes only to mention %rbp, so this is in |
| fact pointless, since %rbp isn't allocatable, but anyway.. */ |
| addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter); |
| addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr); |
| return; |
| case Ain_ProfInc: |
| addHRegUse(u, HRmWrite, hregAMD64_R11()); |
| return; |
| default: |
| ppAMD64Instr(i, mode64); |
| vpanic("getRegUsage_AMD64Instr"); |
| } |
| } |
| |
| /* local helper */ |
| static inline void mapReg(HRegRemap* m, HReg* r) |
| { |
| *r = lookupHRegRemap(m, *r); |
| } |
| |
| void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) |
| { |
| vassert(mode64 == True); |
| switch (i->tag) { |
| case Ain_Imm64: |
| mapReg(m, &i->Ain.Imm64.dst); |
| return; |
| case Ain_Alu64R: |
| mapRegs_AMD64RMI(m, i->Ain.Alu64R.src); |
| mapReg(m, &i->Ain.Alu64R.dst); |
| return; |
| case Ain_Alu64M: |
| mapRegs_AMD64RI(m, i->Ain.Alu64M.src); |
| mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst); |
| return; |
| case Ain_Sh64: |
| mapReg(m, &i->Ain.Sh64.dst); |
| return; |
| case Ain_Test64: |
| mapReg(m, &i->Ain.Test64.dst); |
| return; |
| case Ain_Unary64: |
| mapReg(m, &i->Ain.Unary64.dst); |
| return; |
| case Ain_Lea64: |
| mapRegs_AMD64AMode(m, i->Ain.Lea64.am); |
| mapReg(m, &i->Ain.Lea64.dst); |
| return; |
| case Ain_Alu32R: |
| mapRegs_AMD64RMI(m, i->Ain.Alu32R.src); |
| mapReg(m, &i->Ain.Alu32R.dst); |
| return; |
| case Ain_MulL: |
| mapRegs_AMD64RM(m, i->Ain.MulL.src); |
| return; |
| case Ain_Div: |
| mapRegs_AMD64RM(m, i->Ain.Div.src); |
| return; |
| case Ain_Push: |
| mapRegs_AMD64RMI(m, i->Ain.Push.src); |
| return; |
| case Ain_Call: |
| return; |
| case Ain_XDirect: |
| mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP); |
| return; |
| case Ain_XIndir: |
| mapReg(m, &i->Ain.XIndir.dstGA); |
| mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP); |
| return; |
| case Ain_XAssisted: |
| mapReg(m, &i->Ain.XAssisted.dstGA); |
| mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP); |
| return; |
| case Ain_CMov64: |
| mapRegs_AMD64RM(m, i->Ain.CMov64.src); |
| mapReg(m, &i->Ain.CMov64.dst); |
| return; |
| case Ain_MovxLQ: |
| mapReg(m, &i->Ain.MovxLQ.src); |
| mapReg(m, &i->Ain.MovxLQ.dst); |
| return; |
| case Ain_LoadEX: |
| mapRegs_AMD64AMode(m, i->Ain.LoadEX.src); |
| mapReg(m, &i->Ain.LoadEX.dst); |
| return; |
| case Ain_Store: |
| mapReg(m, &i->Ain.Store.src); |
| mapRegs_AMD64AMode(m, i->Ain.Store.dst); |
| return; |
| case Ain_Set64: |
| mapReg(m, &i->Ain.Set64.dst); |
| return; |
| case Ain_Bsfr64: |
| mapReg(m, &i->Ain.Bsfr64.src); |
| mapReg(m, &i->Ain.Bsfr64.dst); |
| return; |
| case Ain_MFence: |
| return; |
| case Ain_ACAS: |
| mapRegs_AMD64AMode(m, i->Ain.ACAS.addr); |
| return; |
| case Ain_DACAS: |
| mapRegs_AMD64AMode(m, i->Ain.DACAS.addr); |
| return; |
| case Ain_A87Free: |
| return; |
| case Ain_A87PushPop: |
| mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr); |
| return; |
| case Ain_A87FpOp: |
| return; |
| case Ain_A87LdCW: |
| mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr); |
| return; |
| case Ain_A87StSW: |
| mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr); |
| return; |
| case Ain_LdMXCSR: |
| mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr); |
| return; |
| case Ain_SseUComIS: |
| mapReg(m, &i->Ain.SseUComIS.srcL); |
| mapReg(m, &i->Ain.SseUComIS.srcR); |
| mapReg(m, &i->Ain.SseUComIS.dst); |
| return; |
| case Ain_SseSI2SF: |
| mapReg(m, &i->Ain.SseSI2SF.src); |
| mapReg(m, &i->Ain.SseSI2SF.dst); |
| return; |
| case Ain_SseSF2SI: |
| mapReg(m, &i->Ain.SseSF2SI.src); |
| mapReg(m, &i->Ain.SseSF2SI.dst); |
| return; |
| case Ain_SseSDSS: |
| mapReg(m, &i->Ain.SseSDSS.src); |
| mapReg(m, &i->Ain.SseSDSS.dst); |
| return; |
| case Ain_SseLdSt: |
| mapReg(m, &i->Ain.SseLdSt.reg); |
| mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr); |
| break; |
| case Ain_SseLdzLO: |
| mapReg(m, &i->Ain.SseLdzLO.reg); |
| mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr); |
| break; |
| case Ain_Sse32Fx4: |
| mapReg(m, &i->Ain.Sse32Fx4.src); |
| mapReg(m, &i->Ain.Sse32Fx4.dst); |
| return; |
| case Ain_Sse32FLo: |
| mapReg(m, &i->Ain.Sse32FLo.src); |
| mapReg(m, &i->Ain.Sse32FLo.dst); |
| return; |
| case Ain_Sse64Fx2: |
| mapReg(m, &i->Ain.Sse64Fx2.src); |
| mapReg(m, &i->Ain.Sse64Fx2.dst); |
| return; |
| case Ain_Sse64FLo: |
| mapReg(m, &i->Ain.Sse64FLo.src); |
| mapReg(m, &i->Ain.Sse64FLo.dst); |
| return; |
| case Ain_SseReRg: |
| mapReg(m, &i->Ain.SseReRg.src); |
| mapReg(m, &i->Ain.SseReRg.dst); |
| return; |
| case Ain_SseCMov: |
| mapReg(m, &i->Ain.SseCMov.src); |
| mapReg(m, &i->Ain.SseCMov.dst); |
| return; |
| case Ain_SseShuf: |
| mapReg(m, &i->Ain.SseShuf.src); |
| mapReg(m, &i->Ain.SseShuf.dst); |
| return; |
| //uu case Ain_AvxLdSt: |
| //uu mapReg(m, &i->Ain.AvxLdSt.reg); |
| //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr); |
| //uu break; |
| //uu case Ain_AvxReRg: |
| //uu mapReg(m, &i->Ain.AvxReRg.src); |
| //uu mapReg(m, &i->Ain.AvxReRg.dst); |
| //uu return; |
| case Ain_EvCheck: |
| /* We expect both amodes only to mention %rbp, so this is in |
| fact pointless, since %rbp isn't allocatable, but anyway.. */ |
| mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter); |
| mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr); |
| return; |
| case Ain_ProfInc: |
| /* hardwires r11 -- nothing to modify. */ |
| return; |
| default: |
| ppAMD64Instr(i, mode64); |
| vpanic("mapRegs_AMD64Instr"); |
| } |
| } |
| |
| /* Figure out if i represents a reg-reg move, and if so assign the |
| source and destination to *src and *dst. If in doubt say No. Used |
| by the register allocator to do move coalescing. |
| */ |
| Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst ) |
| { |
| switch (i->tag) { |
| case Ain_Alu64R: |
| /* Moves between integer regs */ |
| if (i->Ain.Alu64R.op != Aalu_MOV) |
| return False; |
| if (i->Ain.Alu64R.src->tag != Armi_Reg) |
| return False; |
| *src = i->Ain.Alu64R.src->Armi.Reg.reg; |
| *dst = i->Ain.Alu64R.dst; |
| return True; |
| case Ain_SseReRg: |
| /* Moves between SSE regs */ |
| if (i->Ain.SseReRg.op != Asse_MOV) |
| return False; |
| *src = i->Ain.SseReRg.src; |
| *dst = i->Ain.SseReRg.dst; |
| return True; |
| //uu case Ain_AvxReRg: |
| //uu /* Moves between AVX regs */ |
| //uu if (i->Ain.AvxReRg.op != Asse_MOV) |
| //uu return False; |
| //uu *src = i->Ain.AvxReRg.src; |
| //uu *dst = i->Ain.AvxReRg.dst; |
| //uu return True; |
| default: |
| return False; |
| } |
| /*NOTREACHED*/ |
| } |
| |
| |
| /* Generate amd64 spill/reload instructions under the direction of the |
| register allocator. Note it's critical these don't write the |
| condition codes. */ |
| |
| void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, |
| HReg rreg, Int offsetB, Bool mode64 ) |
| { |
| AMD64AMode* am; |
| vassert(offsetB >= 0); |
| vassert(!hregIsVirtual(rreg)); |
| vassert(mode64 == True); |
| *i1 = *i2 = NULL; |
| am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); |
| switch (hregClass(rreg)) { |
| case HRcInt64: |
| *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am ); |
| return; |
| case HRcVec128: |
| *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am ); |
| return; |
| default: |
| ppHRegClass(hregClass(rreg)); |
| vpanic("genSpill_AMD64: unimplemented regclass"); |
| } |
| } |
| |
| void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, |
| HReg rreg, Int offsetB, Bool mode64 ) |
| { |
| AMD64AMode* am; |
| vassert(offsetB >= 0); |
| vassert(!hregIsVirtual(rreg)); |
| vassert(mode64 == True); |
| *i1 = *i2 = NULL; |
| am = AMD64AMode_IR(offsetB, hregAMD64_RBP()); |
| switch (hregClass(rreg)) { |
| case HRcInt64: |
| *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg ); |
| return; |
| case HRcVec128: |
| *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am ); |
| return; |
| default: |
| ppHRegClass(hregClass(rreg)); |
| vpanic("genReload_AMD64: unimplemented regclass"); |
| } |
| } |
| |
| |
| /* --------- The amd64 assembler (bleh.) --------- */ |
| |
| /* Produce the low three bits of an integer register number. */ |
| static UChar iregBits210 ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcInt64); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 15); |
| return toUChar(n & 7); |
| } |
| |
| /* Produce bit 3 of an integer register number. */ |
| static UChar iregBit3 ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcInt64); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 15); |
| return toUChar((n >> 3) & 1); |
| } |
| |
| /* Produce a complete 4-bit integer register number. */ |
| static UChar iregBits3210 ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcInt64); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 15); |
| return toUChar(n); |
| } |
| |
| /* Given an xmm (128bit V-class) register number, produce the |
| equivalent numbered register in 64-bit I-class. This is a bit of |
| fakery which facilitates using functions that work on integer |
| register numbers to be used when assembling SSE instructions |
| too. */ |
| static UInt vreg2ireg ( HReg r ) |
| { |
| UInt n; |
| vassert(hregClass(r) == HRcVec128); |
| vassert(!hregIsVirtual(r)); |
| n = hregNumber(r); |
| vassert(n <= 15); |
| return mkHReg(n, HRcInt64, False); |
| } |
| |
| //uu /* Ditto for ymm regs. */ |
| //uu static UInt dvreg2ireg ( HReg r ) |
| //uu { |
| //uu UInt n; |
| //uu vassert(hregClass(r) == HRcVec256); |
| //uu vassert(!hregIsVirtual(r)); |
| //uu n = hregNumber(r); |
| //uu vassert(n <= 15); |
| //uu return mkHReg(n, HRcInt64, False); |
| //uu } |
| |
| static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) |
| { |
| return toUChar( ((mod & 3) << 6) |
| | ((reg & 7) << 3) |
| | (regmem & 7) ); |
| } |
| |
| static UChar mkSIB ( Int shift, Int regindex, Int regbase ) |
| { |
| return toUChar( ((shift & 3) << 6) |
| | ((regindex & 7) << 3) |
| | (regbase & 7) ); |
| } |
| |
| static UChar* emit32 ( UChar* p, UInt w32 ) |
| { |
| *p++ = toUChar((w32) & 0x000000FF); |
| *p++ = toUChar((w32 >> 8) & 0x000000FF); |
| *p++ = toUChar((w32 >> 16) & 0x000000FF); |
| *p++ = toUChar((w32 >> 24) & 0x000000FF); |
| return p; |
| } |
| |
| static UChar* emit64 ( UChar* p, ULong w64 ) |
| { |
| p = emit32(p, toUInt(w64 & 0xFFFFFFFF)); |
| p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF)); |
| return p; |
| } |
| |
| /* Does a sign-extend of the lowest 8 bits give |
| the original number? */ |
| static Bool fits8bits ( UInt w32 ) |
| { |
| Int i32 = (Int)w32; |
| return toBool(i32 == ((i32 << 24) >> 24)); |
| } |
| /* Can the lower 32 bits be signedly widened to produce the whole |
| 64-bit value? In other words, are the top 33 bits either all 0 or |
| all 1 ? */ |
| static Bool fitsIn32Bits ( ULong x ) |
| { |
| Long y0 = (Long)x; |
| Long y1 = y0; |
| y1 <<= 32; |
| y1 >>=/*s*/ 32; |
| return toBool(x == y1); |
| } |
| |
| |
| /* Forming mod-reg-rm bytes and scale-index-base bytes. |
| |
| greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13 |
| = 00 greg ereg |
| |
| greg, d8(ereg) | ereg is neither of: RSP R12 |
| = 01 greg ereg, d8 |
| |
| greg, d32(ereg) | ereg is neither of: RSP R12 |
| = 10 greg ereg, d32 |
| |
| greg, d8(ereg) | ereg is either: RSP R12 |
| = 01 greg 100, 0x24, d8 |
| (lowest bit of rex distinguishes R12/RSP) |
| |
| greg, d32(ereg) | ereg is either: RSP R12 |
| = 10 greg 100, 0x24, d32 |
| (lowest bit of rex distinguishes R12/RSP) |
| |
| ----------------------------------------------- |
| |
| greg, d8(base,index,scale) |
| | index != RSP |
| = 01 greg 100, scale index base, d8 |
| |
| greg, d32(base,index,scale) |
| | index != RSP |
| = 10 greg 100, scale index base, d32 |
| */ |
| static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am ) |
| { |
| if (am->tag == Aam_IR) { |
| if (am->Aam.IR.imm == 0 |
| && am->Aam.IR.reg != hregAMD64_RSP() |
| && am->Aam.IR.reg != hregAMD64_RBP() |
| && am->Aam.IR.reg != hregAMD64_R12() |
| && am->Aam.IR.reg != hregAMD64_R13() |
| ) { |
| *p++ = mkModRegRM(0, iregBits210(greg), |
| iregBits210(am->Aam.IR.reg)); |
| return p; |
| } |
| if (fits8bits(am->Aam.IR.imm) |
| && am->Aam.IR.reg != hregAMD64_RSP() |
| && am->Aam.IR.reg != hregAMD64_R12() |
| ) { |
| *p++ = mkModRegRM(1, iregBits210(greg), |
| iregBits210(am->Aam.IR.reg)); |
| *p++ = toUChar(am->Aam.IR.imm & 0xFF); |
| return p; |
| } |
| if (am->Aam.IR.reg != hregAMD64_RSP() |
| && am->Aam.IR.reg != hregAMD64_R12() |
| ) { |
| *p++ = mkModRegRM(2, iregBits210(greg), |
| iregBits210(am->Aam.IR.reg)); |
| p = emit32(p, am->Aam.IR.imm); |
| return p; |
| } |
| if ((am->Aam.IR.reg == hregAMD64_RSP() |
| || am->Aam.IR.reg == hregAMD64_R12()) |
| && fits8bits(am->Aam.IR.imm)) { |
| *p++ = mkModRegRM(1, iregBits210(greg), 4); |
| *p++ = 0x24; |
| *p++ = toUChar(am->Aam.IR.imm & 0xFF); |
| return p; |
| } |
| if (/* (am->Aam.IR.reg == hregAMD64_RSP() |
| || wait for test case for RSP case */ |
| am->Aam.IR.reg == hregAMD64_R12()) { |
| *p++ = mkModRegRM(2, iregBits210(greg), 4); |
| *p++ = 0x24; |
| p = emit32(p, am->Aam.IR.imm); |
| return p; |
| } |
| ppAMD64AMode(am); |
| vpanic("doAMode_M: can't emit amode IR"); |
| /*NOTREACHED*/ |
| } |
| if (am->tag == Aam_IRRS) { |
| if (fits8bits(am->Aam.IRRS.imm) |
| && am->Aam.IRRS.index != hregAMD64_RSP()) { |
| *p++ = mkModRegRM(1, iregBits210(greg), 4); |
| *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, |
| am->Aam.IRRS.base); |
| *p++ = toUChar(am->Aam.IRRS.imm & 0xFF); |
| return p; |
| } |
| if (am->Aam.IRRS.index != hregAMD64_RSP()) { |
| *p++ = mkModRegRM(2, iregBits210(greg), 4); |
| *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index, |
| am->Aam.IRRS.base); |
| p = emit32(p, am->Aam.IRRS.imm); |
| return p; |
| } |
| ppAMD64AMode(am); |
| vpanic("doAMode_M: can't emit amode IRRS"); |
| /*NOTREACHED*/ |
| } |
| vpanic("doAMode_M: unknown amode"); |
| /*NOTREACHED*/ |
| } |
| |
| |
| /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ |
| static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) |
| { |
| *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg)); |
| return p; |
| } |
| |
| |
| /* Clear the W bit on a REX byte, thereby changing the operand size |
| back to whatever that instruction's default operand size is. */ |
| static inline UChar clearWBit ( UChar rex ) |
| { |
| return toUChar(rex & ~(1<<3)); |
| } |
| |
| |
| /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */ |
| static UChar rexAMode_M ( HReg greg, AMD64AMode* am ) |
| { |
| if (am->tag == Aam_IR) { |
| UChar W = 1; /* we want 64-bit mode */ |
| UChar R = iregBit3(greg); |
| UChar X = 0; /* not relevant */ |
| UChar B = iregBit3(am->Aam.IR.reg); |
| return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); |
| } |
| if (am->tag == Aam_IRRS) { |
| UChar W = 1; /* we want 64-bit mode */ |
| UChar R = iregBit3(greg); |
| UChar X = iregBit3(am->Aam.IRRS.index); |
| UChar B = iregBit3(am->Aam.IRRS.base); |
| return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); |
| } |
| vassert(0); |
| return 0; /*NOTREACHED*/ |
| } |
| |
| /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */ |
| static UChar rexAMode_R ( HReg greg, HReg ereg ) |
| { |
| UChar W = 1; /* we want 64-bit mode */ |
| UChar R = iregBit3(greg); |
| UChar X = 0; /* not relevant */ |
| UChar B = iregBit3(ereg); |
| return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0))); |
| } |
| |
| |
| //uu /* May 2012: this VEX prefix stuff is currently unused, but has |
| //uu verified correct (I reckon). Certainly it has been known to |
| //uu produce correct VEX prefixes during testing. */ |
| //uu |
| //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and |
| //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go |
| //uu in verbatim. There's no range checking on the bits. */ |
| //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB, |
| //uu UInt mmmmm, UInt rexW, UInt notVvvv, |
| //uu UInt L, UInt pp ) |
| //uu { |
| //uu UChar byte0 = 0; |
| //uu UChar byte1 = 0; |
| //uu UChar byte2 = 0; |
| //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) { |
| //uu /* 2 byte encoding is possible. */ |
| //uu byte0 = 0xC5; |
| //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3) |
| //uu | (L << 2) | pp; |
| //uu } else { |
| //uu /* 3 byte encoding is needed. */ |
| //uu byte0 = 0xC4; |
| //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6) |
| //uu | ((rexB ^ 1) << 5) | mmmmm; |
| //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp; |
| //uu } |
| //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0); |
| //uu } |
| //uu |
| //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits |
| //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in |
| //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to |
| //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and |
| //uu vvvv=1111 (unused 3rd reg). */ |
| //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am ) |
| //uu { |
| //uu UChar L = 1; /* size = 256 */ |
| //uu UChar pp = 0; /* no SIMD prefix */ |
| //uu UChar mmmmm = 1; /* 0F */ |
| //uu UChar notVvvv = 0; /* unused */ |
| //uu UChar rexW = 0; |
| //uu UChar rexR = 0; |
| //uu UChar rexX = 0; |
| //uu UChar rexB = 0; |
| //uu /* Same logic as in rexAMode_M. */ |
| //uu if (am->tag == Aam_IR) { |
| //uu rexR = iregBit3(greg); |
| //uu rexX = 0; /* not relevant */ |
| //uu rexB = iregBit3(am->Aam.IR.reg); |
| //uu } |
| //uu else if (am->tag == Aam_IRRS) { |
| //uu rexR = iregBit3(greg); |
| //uu rexX = iregBit3(am->Aam.IRRS.index); |
| //uu rexB = iregBit3(am->Aam.IRRS.base); |
| //uu } else { |
| //uu vassert(0); |
| //uu } |
| //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp ); |
| //uu } |
| //uu |
| //uu static UChar* emitVexPrefix ( UChar* p, UInt vex ) |
| //uu { |
| //uu switch (vex & 0xFF) { |
| //uu case 0xC5: |
| //uu *p++ = 0xC5; |
| //uu *p++ = (vex >> 8) & 0xFF; |
| //uu vassert(0 == (vex >> 16)); |
| //uu break; |
| //uu case 0xC4: |
| //uu *p++ = 0xC4; |
| //uu *p++ = (vex >> 8) & 0xFF; |
| //uu *p++ = (vex >> 16) & 0xFF; |
| //uu vassert(0 == (vex >> 24)); |
| //uu break; |
| //uu default: |
| //uu vassert(0); |
| //uu } |
| //uu return p; |
| //uu } |
| |
| |
| /* Emit ffree %st(N) */ |
| static UChar* do_ffree_st ( UChar* p, Int n ) |
| { |
| vassert(n >= 0 && n <= 7); |
| *p++ = 0xDD; |
| *p++ = toUChar(0xC0 + n); |
| return p; |
| } |
| |
| /* Emit an instruction into buf and return the number of bytes used. |
| Note that buf is not the insn's final place, and therefore it is |
| imperative to emit position-independent code. If the emitted |
| instruction was a profiler inc, set *is_profInc to True, else |
| leave it unchanged. */ |
| |
| Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, |
| UChar* buf, Int nbuf, AMD64Instr* i, |
| Bool mode64, |
| void* disp_cp_chain_me_to_slowEP, |
| void* disp_cp_chain_me_to_fastEP, |
| void* disp_cp_xindir, |
| void* disp_cp_xassisted ) |
| { |
| UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; |
| UInt xtra; |
| UInt reg; |
| UChar rex; |
| UChar* p = &buf[0]; |
| UChar* ptmp; |
| Int j; |
| vassert(nbuf >= 32); |
| vassert(mode64 == True); |
| |
| /* Wrap an integer as a int register, for use assembling |
| GrpN insns, in which the greg field is used as a sub-opcode |
| and does not really contain a register. */ |
| # define fake(_n) mkHReg((_n), HRcInt64, False) |
| |
| /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */ |
| |
| switch (i->tag) { |
| |
| case Ain_Imm64: |
| if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) { |
| /* Use the short form (load into 32 bit reg, + default |
| widening rule) for constants under 1 million. We could |
| use this form for the range 0 to 0x7FFFFFFF inclusive, but |
| limit it to a smaller range for verifiability purposes. */ |
| if (1 & iregBit3(i->Ain.Imm64.dst)) |
| *p++ = 0x41; |
| *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst); |
| p = emit32(p, (UInt)i->Ain.Imm64.imm64); |
| } else { |
| *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst))); |
| *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst)); |
| p = emit64(p, i->Ain.Imm64.imm64); |
| } |
| goto done; |
| |
| case Ain_Alu64R: |
| /* Deal specially with MOV */ |
| if (i->Ain.Alu64R.op == Aalu_MOV) { |
| switch (i->Ain.Alu64R.src->tag) { |
| case Armi_Imm: |
| if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) { |
| /* Actually we could use this form for constants in |
| the range 0 through 0x7FFFFFFF inclusive, but |
| limit it to a small range for verifiability |
| purposes. */ |
| /* Generate "movl $imm32, 32-bit-register" and let |
| the default zero-extend rule cause the upper half |
| of the dst to be zeroed out too. This saves 1 |
| and sometimes 2 bytes compared to the more |
| obvious encoding in the 'else' branch. */ |
| if (1 & iregBit3(i->Ain.Alu64R.dst)) |
| *p++ = 0x41; |
| *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst); |
| p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } else { |
| *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst))); |
| *p++ = 0xC7; |
| *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst)); |
| p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } |
| goto done; |
| case Armi_Reg: |
| *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, |
| i->Ain.Alu64R.dst ); |
| *p++ = 0x89; |
| p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, |
| i->Ain.Alu64R.dst); |
| goto done; |
| case Armi_Mem: |
| *p++ = rexAMode_M(i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| *p++ = 0x8B; |
| p = doAMode_M(p, i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| /* MUL */ |
| if (i->Ain.Alu64R.op == Aalu_MUL) { |
| switch (i->Ain.Alu64R.src->tag) { |
| case Armi_Reg: |
| *p++ = rexAMode_R( i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Reg.reg); |
| *p++ = 0x0F; |
| *p++ = 0xAF; |
| p = doAMode_R(p, i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Reg.reg); |
| goto done; |
| case Armi_Mem: |
| *p++ = rexAMode_M(i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| *p++ = 0x0F; |
| *p++ = 0xAF; |
| p = doAMode_M(p, i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| goto done; |
| case Armi_Imm: |
| if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { |
| *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); |
| *p++ = 0x6B; |
| p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); |
| *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } else { |
| *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); |
| *p++ = 0x69; |
| p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst); |
| p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ |
| opc = opc_rr = subopc_imm = opc_imma = 0; |
| switch (i->Ain.Alu64R.op) { |
| case Aalu_ADC: opc = 0x13; opc_rr = 0x11; |
| subopc_imm = 2; opc_imma = 0x15; break; |
| case Aalu_ADD: opc = 0x03; opc_rr = 0x01; |
| subopc_imm = 0; opc_imma = 0x05; break; |
| case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; |
| subopc_imm = 5; opc_imma = 0x2D; break; |
| case Aalu_SBB: opc = 0x1B; opc_rr = 0x19; |
| subopc_imm = 3; opc_imma = 0x1D; break; |
| case Aalu_AND: opc = 0x23; opc_rr = 0x21; |
| subopc_imm = 4; opc_imma = 0x25; break; |
| case Aalu_XOR: opc = 0x33; opc_rr = 0x31; |
| subopc_imm = 6; opc_imma = 0x35; break; |
| case Aalu_OR: opc = 0x0B; opc_rr = 0x09; |
| subopc_imm = 1; opc_imma = 0x0D; break; |
| case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; |
| subopc_imm = 7; opc_imma = 0x3D; break; |
| default: goto bad; |
| } |
| switch (i->Ain.Alu64R.src->tag) { |
| case Armi_Imm: |
| if (i->Ain.Alu64R.dst == hregAMD64_RAX() |
| && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { |
| goto bad; /* FIXME: awaiting test case */ |
| *p++ = toUChar(opc_imma); |
| p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } else |
| if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) { |
| *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst ); |
| *p++ = 0x83; |
| p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); |
| *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } else { |
| *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst); |
| *p++ = 0x81; |
| p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst); |
| p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32); |
| } |
| goto done; |
| case Armi_Reg: |
| *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg, |
| i->Ain.Alu64R.dst); |
| *p++ = toUChar(opc_rr); |
| p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg, |
| i->Ain.Alu64R.dst); |
| goto done; |
| case Armi_Mem: |
| *p++ = rexAMode_M( i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, i->Ain.Alu64R.dst, |
| i->Ain.Alu64R.src->Armi.Mem.am); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Ain_Alu64M: |
| /* Deal specially with MOV */ |
| if (i->Ain.Alu64M.op == Aalu_MOV) { |
| switch (i->Ain.Alu64M.src->tag) { |
| case Ari_Reg: |
| *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg, |
| i->Ain.Alu64M.dst); |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg, |
| i->Ain.Alu64M.dst); |
| goto done; |
| case Ari_Imm: |
| *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst); |
| *p++ = 0xC7; |
| p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst); |
| p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| break; |
| |
| case Ain_Sh64: |
| opc_cl = opc_imm = subopc = 0; |
| switch (i->Ain.Sh64.op) { |
| case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; |
| case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; |
| case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; |
| default: goto bad; |
| } |
| if (i->Ain.Sh64.src == 0) { |
| *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); |
| *p++ = toUChar(opc_cl); |
| p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); |
| goto done; |
| } else { |
| *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst); |
| *p++ = toUChar(opc_imm); |
| p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst); |
| *p++ = (UChar)(i->Ain.Sh64.src); |
| goto done; |
| } |
| break; |
| |
| case Ain_Test64: |
| /* testq sign-extend($imm32), %reg */ |
| *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(0), i->Ain.Test64.dst); |
| p = emit32(p, i->Ain.Test64.imm32); |
| goto done; |
| |
| case Ain_Unary64: |
| if (i->Ain.Unary64.op == Aun_NOT) { |
| *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(2), i->Ain.Unary64.dst); |
| goto done; |
| } |
| if (i->Ain.Unary64.op == Aun_NEG) { |
| *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(3), i->Ain.Unary64.dst); |
| goto done; |
| } |
| break; |
| |
| case Ain_Lea64: |
| *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am); |
| *p++ = 0x8D; |
| p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am); |
| goto done; |
| |
| case Ain_Alu32R: |
| /* ADD/SUB/AND/OR/XOR/CMP */ |
| opc = opc_rr = subopc_imm = opc_imma = 0; |
| switch (i->Ain.Alu32R.op) { |
| case Aalu_ADD: opc = 0x03; opc_rr = 0x01; |
| subopc_imm = 0; opc_imma = 0x05; break; |
| case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; |
| subopc_imm = 5; opc_imma = 0x2D; break; |
| case Aalu_AND: opc = 0x23; opc_rr = 0x21; |
| subopc_imm = 4; opc_imma = 0x25; break; |
| case Aalu_XOR: opc = 0x33; opc_rr = 0x31; |
| subopc_imm = 6; opc_imma = 0x35; break; |
| case Aalu_OR: opc = 0x0B; opc_rr = 0x09; |
| subopc_imm = 1; opc_imma = 0x0D; break; |
| case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; |
| subopc_imm = 7; opc_imma = 0x3D; break; |
| default: goto bad; |
| } |
| switch (i->Ain.Alu32R.src->tag) { |
| case Armi_Imm: |
| if (i->Ain.Alu32R.dst == hregAMD64_RAX() |
| && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { |
| goto bad; /* FIXME: awaiting test case */ |
| *p++ = toUChar(opc_imma); |
| p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); |
| } else |
| if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) { |
| rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) ); |
| if (rex != 0x40) *p++ = rex; |
| *p++ = 0x83; |
| p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); |
| *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32); |
| } else { |
| rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) ); |
| if (rex != 0x40) *p++ = rex; |
| *p++ = 0x81; |
| p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst); |
| p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32); |
| } |
| goto done; |
| case Armi_Reg: |
| rex = clearWBit( |
| rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg, |
| i->Ain.Alu32R.dst) ); |
| if (rex != 0x40) *p++ = rex; |
| *p++ = toUChar(opc_rr); |
| p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg, |
| i->Ain.Alu32R.dst); |
| goto done; |
| case Armi_Mem: |
| rex = clearWBit( |
| rexAMode_M( i->Ain.Alu32R.dst, |
| i->Ain.Alu32R.src->Armi.Mem.am) ); |
| if (rex != 0x40) *p++ = rex; |
| *p++ = toUChar(opc); |
| p = doAMode_M(p, i->Ain.Alu32R.dst, |
| i->Ain.Alu32R.src->Armi.Mem.am); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Ain_MulL: |
| subopc = i->Ain.MulL.syned ? 5 : 4; |
| switch (i->Ain.MulL.src->tag) { |
| case Arm_Mem: |
| *p++ = rexAMode_M( fake(0), |
| i->Ain.MulL.src->Arm.Mem.am); |
| *p++ = 0xF7; |
| p = doAMode_M(p, fake(subopc), |
| i->Ain.MulL.src->Arm.Mem.am); |
| goto done; |
| case Arm_Reg: |
| *p++ = rexAMode_R(fake(0), |
| i->Ain.MulL.src->Arm.Reg.reg); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(subopc), |
| i->Ain.MulL.src->Arm.Reg.reg); |
| goto done; |
| default: |
| goto bad; |
| } |
| break; |
| |
| case Ain_Div: |
| subopc = i->Ain.Div.syned ? 7 : 6; |
| if (i->Ain.Div.sz == 4) { |
| switch (i->Ain.Div.src->tag) { |
| case Arm_Mem: |
| goto bad; |
| /*FIXME*/ |
| *p++ = 0xF7; |
| p = doAMode_M(p, fake(subopc), |
| i->Ain.Div.src->Arm.Mem.am); |
| goto done; |
| case Arm_Reg: |
| *p++ = clearWBit( |
| rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg)); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(subopc), |
| i->Ain.Div.src->Arm.Reg.reg); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| if (i->Ain.Div.sz == 8) { |
| switch (i->Ain.Div.src->tag) { |
| case Arm_Mem: |
| *p++ = rexAMode_M( fake(0), |
| i->Ain.Div.src->Arm.Mem.am); |
| *p++ = 0xF7; |
| p = doAMode_M(p, fake(subopc), |
| i->Ain.Div.src->Arm.Mem.am); |
| goto done; |
| case Arm_Reg: |
| *p++ = rexAMode_R( fake(0), |
| i->Ain.Div.src->Arm.Reg.reg); |
| *p++ = 0xF7; |
| p = doAMode_R(p, fake(subopc), |
| i->Ain.Div.src->Arm.Reg.reg); |
| goto done; |
| default: |
| goto bad; |
| } |
| } |
| break; |
| |
| case Ain_Push: |
| switch (i->Ain.Push.src->tag) { |
| case Armi_Mem: |
| *p++ = clearWBit( |
| rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am)); |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am); |
| goto done; |
| case Armi_Imm: |
| *p++ = 0x68; |
| p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32); |
| goto done; |
| case Armi_Reg: |
| *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg))); |
| *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg)); |
| goto done; |
| default: |
| goto bad; |
| } |
| |
| case Ain_Call: { |
| /* As per detailed comment for Ain_Call in |
| getRegUsage_AMD64Instr above, %r11 is used as an address |
| temporary. */ |
| /* jump over the following two insns if the condition does not |
| hold */ |
| Bool shortImm = fitsIn32Bits(i->Ain.Call.target); |
| if (i->Ain.Call.cond != Acc_ALWAYS) { |
| *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); |
| *p++ = shortImm ? 10 : 13; |
| /* 10 or 13 bytes in the next two insns */ |
| } |
| if (shortImm) { |
| /* 7 bytes: movl sign-extend(imm32), %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xC7; |
| *p++ = 0xC3; |
| p = emit32(p, (UInt)i->Ain.Call.target); |
| } else { |
| /* 10 bytes: movabsq $target, %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xBB; |
| p = emit64(p, i->Ain.Call.target); |
| } |
| /* 3 bytes: call *%r11 */ |
| *p++ = 0x41; |
| *p++ = 0xFF; |
| *p++ = 0xD3; |
| goto done; |
| } |
| |
| case Ain_XDirect: { |
| /* NB: what goes on here has to be very closely coordinated with the |
| chainXDirect_AMD64 and unchainXDirect_AMD64 below. */ |
| /* We're generating chain-me requests here, so we need to be |
| sure this is actually allowed -- no-redir translations can't |
| use chain-me's. Hence: */ |
| vassert(disp_cp_chain_me_to_slowEP != NULL); |
| vassert(disp_cp_chain_me_to_fastEP != NULL); |
| |
| HReg r11 = hregAMD64_R11(); |
| |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Ain.XDirect.cond != Acc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* Update the guest RIP. */ |
| if (fitsIn32Bits(i->Ain.XDirect.dstGA)) { |
| /* use a shorter encoding */ |
| /* movl sign-extend(dstGA), %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xC7; |
| *p++ = 0xC3; |
| p = emit32(p, (UInt)i->Ain.XDirect.dstGA); |
| } else { |
| /* movabsq $dstGA, %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xBB; |
| p = emit64(p, i->Ain.XDirect.dstGA); |
| } |
| |
| /* movq %r11, amRIP */ |
| *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP); |
| *p++ = 0x89; |
| p = doAMode_M(p, r11, i->Ain.XDirect.amRIP); |
| |
| /* --- FIRST PATCHABLE BYTE follows --- */ |
| /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling |
| to) backs up the return address, so as to find the address of |
| the first patchable byte. So: don't change the length of the |
| two instructions below. */ |
| /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */ |
| *p++ = 0x49; |
| *p++ = 0xBB; |
| void* disp_cp_chain_me |
| = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP |
| : disp_cp_chain_me_to_slowEP; |
| p = emit64(p, Ptr_to_ULong(disp_cp_chain_me)); |
| /* call *%r11 */ |
| *p++ = 0x41; |
| *p++ = 0xFF; |
| *p++ = 0xD3; |
| /* --- END of PATCHABLE BYTES --- */ |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Ain.XDirect.cond != Acc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Ain_XIndir: { |
| /* We're generating transfers that could lead indirectly to a |
| chain-me, so we need to be sure this is actually allowed -- |
| no-redir translations are not allowed to reach normal |
| translations without going through the scheduler. That means |
| no XDirects or XIndirs out from no-redir translations. |
| Hence: */ |
| vassert(disp_cp_xindir != NULL); |
| |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Ain.XIndir.cond != Acc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ |
| *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); |
| |
| /* get $disp_cp_xindir into %r11 */ |
| if (fitsIn32Bits(Ptr_to_ULong(disp_cp_xindir))) { |
| /* use a shorter encoding */ |
| /* movl sign-extend(disp_cp_xindir), %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xC7; |
| *p++ = 0xC3; |
| p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); |
| } else { |
| /* movabsq $disp_cp_xindir, %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xBB; |
| p = emit64(p, Ptr_to_ULong(disp_cp_xindir)); |
| } |
| |
| /* jmp *%r11 */ |
| *p++ = 0x41; |
| *p++ = 0xFF; |
| *p++ = 0xE3; |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Ain.XIndir.cond != Acc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Ain_XAssisted: { |
| /* Use ptmp for backpatching conditional jumps. */ |
| ptmp = NULL; |
| |
| /* First off, if this is conditional, create a conditional |
| jump over the rest of it. */ |
| if (i->Ain.XAssisted.cond != Acc_ALWAYS) { |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1))); |
| ptmp = p; /* fill in this bit later */ |
| *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ |
| } |
| |
| /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ |
| *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); |
| /* movl $magic_number, %ebp. Since these numbers are all small positive |
| integers, we can get away with "movl $N, %ebp" rather than |
| the longer "movq $N, %rbp". */ |
| UInt trcval = 0; |
| switch (i->Ain.XAssisted.jk) { |
| case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; |
| case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; |
| case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break; |
| case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; |
| case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; |
| case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; |
| case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; |
| case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; |
| case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; |
| case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; |
| case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; |
| case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; |
| /* We don't expect to see the following being assisted. */ |
| case Ijk_Ret: |
| case Ijk_Call: |
| /* fallthrough */ |
| default: |
| ppIRJumpKind(i->Ain.XAssisted.jk); |
| vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind"); |
| } |
| vassert(trcval != 0); |
| *p++ = 0xBD; |
| p = emit32(p, trcval); |
| /* movabsq $disp_assisted, %r11 */ |
| *p++ = 0x49; |
| *p++ = 0xBB; |
| p = emit64(p, Ptr_to_ULong(disp_cp_xassisted)); |
| /* jmp *%r11 */ |
| *p++ = 0x41; |
| *p++ = 0xFF; |
| *p++ = 0xE3; |
| |
| /* Fix up the conditional jump, if there was one. */ |
| if (i->Ain.XAssisted.cond != Acc_ALWAYS) { |
| Int delta = p - ptmp; |
| vassert(delta > 0 && delta < 40); |
| *ptmp = toUChar(delta-1); |
| } |
| goto done; |
| } |
| |
| case Ain_CMov64: |
| vassert(i->Ain.CMov64.cond != Acc_ALWAYS); |
| if (i->Ain.CMov64.src->tag == Arm_Reg) { |
| *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); |
| *p++ = 0x0F; |
| *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); |
| p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg); |
| goto done; |
| } |
| if (i->Ain.CMov64.src->tag == Arm_Mem) { |
| *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); |
| *p++ = 0x0F; |
| *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond)); |
| p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am); |
| goto done; |
| } |
| break; |
| |
| case Ain_MovxLQ: |
| /* No, _don't_ ask me why the sense of the args has to be |
| different in the S vs Z case. I don't know. */ |
| if (i->Ain.MovxLQ.syned) { |
| /* Need REX.W = 1 here, but rexAMode_R does that for us. */ |
| *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); |
| *p++ = 0x63; |
| p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src); |
| } else { |
| /* Produce a 32-bit reg-reg move, since the implicit |
| zero-extend does what we want. */ |
| *p++ = clearWBit ( |
| rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst)); |
| *p++ = 0x89; |
| p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst); |
| } |
| goto done; |
| |
| case Ain_LoadEX: |
| if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) { |
| /* movzbq */ |
| *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); |
| *p++ = 0x0F; |
| *p++ = 0xB6; |
| p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); |
| goto done; |
| } |
| if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) { |
| /* movzwq */ |
| *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src); |
| *p++ = 0x0F; |
| *p++ = 0xB7; |
| p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); |
| goto done; |
| } |
| if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) { |
| /* movzlq */ |
| /* This isn't really an existing AMD64 instruction per se. |
| Rather, we have to do a 32-bit load. Because a 32-bit |
| write implicitly clears the upper 32 bits of the target |
| register, we get what we want. */ |
| *p++ = clearWBit( |
| rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src)); |
| *p++ = 0x8B; |
| p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src); |
| goto done; |
| } |
| break; |
| |
| case Ain_Set64: |
| /* Make the destination register be 1 or 0, depending on whether |
| the relevant condition holds. Complication: the top 56 bits |
| of the destination should be forced to zero, but doing 'xorq |
| %r,%r' kills the flag(s) we are about to read. Sigh. So |
| start off my moving $0 into the dest. */ |
| reg = iregBits3210(i->Ain.Set64.dst); |
| vassert(reg < 16); |
| |
| /* movq $0, %dst */ |
| *p++ = toUChar(reg >= 8 ? 0x49 : 0x48); |
| *p++ = 0xC7; |
| *p++ = toUChar(0xC0 + (reg & 7)); |
| p = emit32(p, 0); |
| |
| /* setb lo8(%dst) */ |
| /* note, 8-bit register rex trickyness. Be careful here. */ |
| *p++ = toUChar(reg >= 8 ? 0x41 : 0x40); |
| *p++ = 0x0F; |
| *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond)); |
| *p++ = toUChar(0xC0 + (reg & 7)); |
| goto done; |
| |
| case Ain_Bsfr64: |
| *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); |
| *p++ = 0x0F; |
| if (i->Ain.Bsfr64.isFwds) { |
| *p++ = 0xBC; |
| } else { |
| *p++ = 0xBD; |
| } |
| p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src); |
| goto done; |
| |
| case Ain_MFence: |
| /* mfence */ |
| *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; |
| goto done; |
| |
| case Ain_ACAS: |
| /* lock */ |
| *p++ = 0xF0; |
| if (i->Ain.ACAS.sz == 2) *p++ = 0x66; |
| /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value |
| in %rbx. The new-value register is hardwired to be %rbx |
| since dealing with byte integer registers is too much hassle, |
| so we force the register operand to %rbx (could equally be |
| %rcx or %rdx). */ |
| rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr ); |
| if (i->Ain.ACAS.sz != 8) |
| rex = clearWBit(rex); |
| |
| *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */ |
| *p++ = 0x0F; |
| if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; |
| p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr); |
| goto done; |
| |
| case Ain_DACAS: |
| /* lock */ |
| *p++ = 0xF0; |
| /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new |
| value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so |
| aren't encoded in the insn. */ |
| rex = rexAMode_M( fake(1), i->Ain.ACAS.addr ); |
| if (i->Ain.ACAS.sz != 8) |
| rex = clearWBit(rex); |
| *p++ = rex; |
| *p++ = 0x0F; |
| *p++ = 0xC7; |
| p = doAMode_M(p, fake(1), i->Ain.DACAS.addr); |
| goto done; |
| |
| case Ain_A87Free: |
| vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7); |
| for (j = 0; j < i->Ain.A87Free.nregs; j++) { |
| p = do_ffree_st(p, 7-j); |
| } |
| goto done; |
| |
| case Ain_A87PushPop: |
| vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4); |
| if (i->Ain.A87PushPop.isPush) { |
| /* Load from memory into %st(0): flds/fldl amode */ |
| *p++ = clearWBit( |
| rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); |
| *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; |
| p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); |
| } else { |
| /* Dump %st(0) to memory: fstps/fstpl amode */ |
| *p++ = clearWBit( |
| rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); |
| *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; |
| p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); |
| goto done; |
| } |
| goto done; |
| |
| case Ain_A87FpOp: |
| switch (i->Ain.A87FpOp.op) { |
| case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; |
| case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; |
| case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; |
| case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break; |
| case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; |
| case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; |
| case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; |
| case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break; |
| case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break; |
| case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; |
| case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; |
| case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; |
| default: goto bad; |
| } |
| goto done; |
| |
| case Ain_A87LdCW: |
| *p++ = clearWBit( |
| rexAMode_M(fake(5), i->Ain.A87LdCW.addr) ); |
| *p++ = 0xD9; |
| p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr); |
| goto done; |
| |
| case Ain_A87StSW: |
| *p++ = clearWBit( |
| rexAMode_M(fake(7), i->Ain.A87StSW.addr) ); |
| *p++ = 0xDD; |
| p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr); |
| goto done; |
| |
| case Ain_Store: |
| if (i->Ain.Store.sz == 2) { |
| /* This just goes to show the crazyness of the instruction |
| set encoding. We have to insert two prefix bytes, but be |
| careful to avoid a conflict in what the size should be, by |
| ensuring that REX.W = 0. */ |
| *p++ = 0x66; /* override to 16-bits */ |
| *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); |
| goto done; |
| } |
| if (i->Ain.Store.sz == 4) { |
| *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); |
| *p++ = 0x89; |
| p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); |
| goto done; |
| } |
| if (i->Ain.Store.sz == 1) { |
| /* This is one place where it would be wrong to skip emitting |
| a rex byte of 0x40, since the mere presence of rex changes |
| the meaning of the byte register access. Be careful. */ |
| *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) ); |
| *p++ = 0x88; |
| p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst); |
| goto done; |
| } |
| break; |
| |
| case Ain_LdMXCSR: |
| *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr)); |
| *p++ = 0x0F; |
| *p++ = 0xAE; |
| p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr); |
| goto done; |
| |
| case Ain_SseUComIS: |
| /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */ |
| /* ucomi[sd] %srcL, %srcR */ |
| if (i->Ain.SseUComIS.sz == 8) { |
| *p++ = 0x66; |
| } else { |
| goto bad; |
| vassert(i->Ain.SseUComIS.sz == 4); |
| } |
| *p++ = clearWBit ( |
| rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL), |
| vreg2ireg(i->Ain.SseUComIS.srcR) )); |
| *p++ = 0x0F; |
| *p++ = 0x2E; |
| p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL), |
| vreg2ireg(i->Ain.SseUComIS.srcR) ); |
| /* pushfq */ |
| *p++ = 0x9C; |
| /* popq %dst */ |
| *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst))); |
| *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst)); |
| goto done; |
| |
| case Ain_SseSI2SF: |
| /* cvssi2s[sd] %src, %dst */ |
| rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst), |
| i->Ain.SseSI2SF.src ); |
| *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2); |
| *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex); |
| *p++ = 0x0F; |
| *p++ = 0x2A; |
| p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst), |
| i->Ain.SseSI2SF.src ); |
| goto done; |
| |
| case Ain_SseSF2SI: |
| /* cvss[sd]2si %src, %dst */ |
| rex = rexAMode_R( i->Ain.SseSF2SI.dst, |
| vreg2ireg(i->Ain.SseSF2SI.src) ); |
| *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2); |
| *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex); |
| *p++ = 0x0F; |
| *p++ = 0x2D; |
| p = doAMode_R( p, i->Ain.SseSF2SI.dst, |
| vreg2ireg(i->Ain.SseSF2SI.src) ); |
| goto done; |
| |
| case Ain_SseSDSS: |
| /* cvtsd2ss/cvtss2sd %src, %dst */ |
| *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3); |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst), |
| vreg2ireg(i->Ain.SseSDSS.src) )); |
| *p++ = 0x0F; |
| *p++ = 0x5A; |
| p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst), |
| vreg2ireg(i->Ain.SseSDSS.src) ); |
| goto done; |
| |
| case Ain_SseLdSt: |
| if (i->Ain.SseLdSt.sz == 8) { |
| *p++ = 0xF2; |
| } else |
| if (i->Ain.SseLdSt.sz == 4) { |
| *p++ = 0xF3; |
| } else |
| if (i->Ain.SseLdSt.sz != 16) { |
| vassert(0); |
| } |
| *p++ = clearWBit( |
| rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr)); |
| *p++ = 0x0F; |
| *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11); |
| p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr); |
| goto done; |
| |
| case Ain_SseLdzLO: |
| vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8); |
| /* movs[sd] amode, %xmm-dst */ |
| *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2); |
| *p++ = clearWBit( |
| rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg), |
| i->Ain.SseLdzLO.addr)); |
| *p++ = 0x0F; |
| *p++ = 0x10; |
| p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg), |
| i->Ain.SseLdzLO.addr); |
| goto done; |
| |
| case Ain_Sse32Fx4: |
| xtra = 0; |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst), |
| vreg2ireg(i->Ain.Sse32Fx4.src) )); |
| *p++ = 0x0F; |
| switch (i->Ain.Sse32Fx4.op) { |
| case Asse_ADDF: *p++ = 0x58; break; |
| case Asse_DIVF: *p++ = 0x5E; break; |
| case Asse_MAXF: *p++ = 0x5F; break; |
| case Asse_MINF: *p++ = 0x5D; break; |
| case Asse_MULF: *p++ = 0x59; break; |
| case Asse_RCPF: *p++ = 0x53; break; |
| case Asse_RSQRTF: *p++ = 0x52; break; |
| case Asse_SQRTF: *p++ = 0x51; break; |
| case Asse_SUBF: *p++ = 0x5C; break; |
| case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst), |
| vreg2ireg(i->Ain.Sse32Fx4.src) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Ain_Sse64Fx2: |
| xtra = 0; |
| *p++ = 0x66; |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst), |
| vreg2ireg(i->Ain.Sse64Fx2.src) )); |
| *p++ = 0x0F; |
| switch (i->Ain.Sse64Fx2.op) { |
| case Asse_ADDF: *p++ = 0x58; break; |
| case Asse_DIVF: *p++ = 0x5E; break; |
| case Asse_MAXF: *p++ = 0x5F; break; |
| case Asse_MINF: *p++ = 0x5D; break; |
| case Asse_MULF: *p++ = 0x59; break; |
| case Asse_SQRTF: *p++ = 0x51; break; |
| case Asse_SUBF: *p++ = 0x5C; break; |
| case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst), |
| vreg2ireg(i->Ain.Sse64Fx2.src) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Ain_Sse32FLo: |
| xtra = 0; |
| *p++ = 0xF3; |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst), |
| vreg2ireg(i->Ain.Sse32FLo.src) )); |
| *p++ = 0x0F; |
| switch (i->Ain.Sse32FLo.op) { |
| case Asse_ADDF: *p++ = 0x58; break; |
| case Asse_DIVF: *p++ = 0x5E; break; |
| case Asse_MAXF: *p++ = 0x5F; break; |
| case Asse_MINF: *p++ = 0x5D; break; |
| case Asse_MULF: *p++ = 0x59; break; |
| case Asse_RCPF: *p++ = 0x53; break; |
| case Asse_RSQRTF: *p++ = 0x52; break; |
| case Asse_SQRTF: *p++ = 0x51; break; |
| case Asse_SUBF: *p++ = 0x5C; break; |
| case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst), |
| vreg2ireg(i->Ain.Sse32FLo.src) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Ain_Sse64FLo: |
| xtra = 0; |
| *p++ = 0xF2; |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst), |
| vreg2ireg(i->Ain.Sse64FLo.src) )); |
| *p++ = 0x0F; |
| switch (i->Ain.Sse64FLo.op) { |
| case Asse_ADDF: *p++ = 0x58; break; |
| case Asse_DIVF: *p++ = 0x5E; break; |
| case Asse_MAXF: *p++ = 0x5F; break; |
| case Asse_MINF: *p++ = 0x5D; break; |
| case Asse_MULF: *p++ = 0x59; break; |
| case Asse_SQRTF: *p++ = 0x51; break; |
| case Asse_SUBF: *p++ = 0x5C; break; |
| case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; |
| case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; |
| case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; |
| case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst), |
| vreg2ireg(i->Ain.Sse64FLo.src) ); |
| if (xtra & 0x100) |
| *p++ = toUChar(xtra & 0xFF); |
| goto done; |
| |
| case Ain_SseReRg: |
| # define XX(_n) *p++ = (_n) |
| |
| rex = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst), |
| vreg2ireg(i->Ain.SseReRg.src) )); |
| |
| switch (i->Ain.SseReRg.op) { |
| case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break; |
| case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break; |
| case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break; |
| case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break; |
| case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break; |
| case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break; |
| case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break; |
| case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break; |
| case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break; |
| case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break; |
| case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break; |
| case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break; |
| case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break; |
| case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break; |
| case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break; |
| case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break; |
| case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break; |
| case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break; |
| case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break; |
| case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break; |
| case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break; |
| case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break; |
| case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break; |
| case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break; |
| case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break; |
| case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break; |
| case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break; |
| case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break; |
| case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break; |
| case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break; |
| case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break; |
| case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break; |
| case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break; |
| case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break; |
| case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break; |
| case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break; |
| case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break; |
| case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break; |
| case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break; |
| case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break; |
| case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break; |
| case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break; |
| case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break; |
| case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break; |
| case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break; |
| case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break; |
| case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break; |
| case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break; |
| case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break; |
| case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break; |
| case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break; |
| case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break; |
| case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break; |
| case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break; |
| case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break; |
| default: goto bad; |
| } |
| p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst), |
| vreg2ireg(i->Ain.SseReRg.src) ); |
| # undef XX |
| goto done; |
| |
| case Ain_SseCMov: |
| /* jmp fwds if !condition */ |
| *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1)); |
| *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ |
| ptmp = p; |
| |
| /* movaps %src, %dst */ |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst), |
| vreg2ireg(i->Ain.SseCMov.src) )); |
| *p++ = 0x0F; |
| *p++ = 0x28; |
| p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst), |
| vreg2ireg(i->Ain.SseCMov.src) ); |
| |
| /* Fill in the jump offset. */ |
| *(ptmp-1) = toUChar(p - ptmp); |
| goto done; |
| |
| case Ain_SseShuf: |
| *p++ = 0x66; |
| *p++ = clearWBit( |
| rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst), |
| vreg2ireg(i->Ain.SseShuf.src) )); |
| *p++ = 0x0F; |
| *p++ = 0x70; |
| p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst), |
| vreg2ireg(i->Ain.SseShuf.src) ); |
| *p++ = (UChar)(i->Ain.SseShuf.order); |
| goto done; |
| |
| //uu case Ain_AvxLdSt: { |
| //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg), |
| //uu i->Ain.AvxLdSt.addr ); |
| //uu p = emitVexPrefix(p, vex); |
| //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11); |
| //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr); |
| //uu goto done; |
| //uu } |
| |
| case Ain_EvCheck: { |
| /* We generate: |
| (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER) |
| (2 bytes) jns nofail expected taken |
| (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR) |
| nofail: |
| */ |
| /* This is heavily asserted re instruction lengths. It needs to |
| be. If we get given unexpected forms of .amCounter or |
| .amFailAddr -- basically, anything that's not of the form |
| uimm7(%rbp) -- they are likely to fail. */ |
| /* Note also that after the decl we must be very careful not to |
| read the carry flag, else we get a partial flags stall. |
| js/jns avoids that, though. */ |
| UChar* p0 = p; |
| /* --- decl 8(%rbp) --- */ |
| /* Need to compute the REX byte for the decl in order to prove |
| that we don't need it, since this is a 32-bit inc and all |
| registers involved in the amode are < r8. "fake(1)" because |
| there's no register in this encoding; instead the register |
| field is used as a sub opcode. The encoding for "decl r/m32" |
| is FF /1, hence the fake(1). */ |
| rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter)); |
| if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */ |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter); |
| vassert(p - p0 == 3); |
| /* --- jns nofail --- */ |
| *p++ = 0x79; |
| *p++ = 0x03; /* need to check this 0x03 after the next insn */ |
| vassert(p - p0 == 5); |
| /* --- jmp* 0(%rbp) --- */ |
| /* Once again, verify we don't need REX. The encoding is FF /4. |
| We don't need REX.W since by default FF /4 in 64-bit mode |
| implies a 64 bit load. */ |
| rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr)); |
| if (rex != 0x40) goto bad; |
| *p++ = 0xFF; |
| p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr); |
| vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ |
| /* And crosscheck .. */ |
| vassert(evCheckSzB_AMD64() == 8); |
| goto done; |
| } |
| |
| case Ain_ProfInc: { |
| /* We generate movabsq $0, %r11 |
| incq (%r11) |
| in the expectation that a later call to LibVEX_patchProfCtr |
| will be used to fill in the immediate field once the right |
| value is known. |
| 49 BB 00 00 00 00 00 00 00 00 |
| 49 FF 03 |
| */ |
| *p++ = 0x49; *p++ = 0xBB; |
| *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; |
| *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; |
| *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03; |
| /* Tell the caller .. */ |
| vassert(!(*is_profInc)); |
| *is_profInc = True; |
| goto done; |
| } |
| |
| default: |
| goto bad; |
| } |
| |
| bad: |
| ppAMD64Instr(i, mode64); |
| vpanic("emit_AMD64Instr"); |
| /*NOTREACHED*/ |
| |
| done: |
| vassert(p - &buf[0] <= 32); |
| return p - &buf[0]; |
| |
| # undef fake |
| } |
| |
| |
| /* How big is an event check? See case for Ain_EvCheck in |
| emit_AMD64Instr just above. That crosschecks what this returns, so |
| we can tell if we're inconsistent. */ |
| Int evCheckSzB_AMD64 ( void ) |
| { |
| return 8; |
| } |
| |
| |
| /* NB: what goes on here has to be very closely coordinated with the |
| emitInstr case for XDirect, above. */ |
| VexInvalRange chainXDirect_AMD64 ( void* place_to_chain, |
| void* disp_cp_chain_me_EXPECTED, |
| void* place_to_jump_to ) |
| { |
| /* What we're expecting to see is: |
| movabsq $disp_cp_chain_me_EXPECTED, %r11 |
| call *%r11 |
| viz |
| 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED> |
| 41 FF D3 |
| */ |
| UChar* p = (UChar*)place_to_chain; |
| vassert(p[0] == 0x49); |
| vassert(p[1] == 0xBB); |
| vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); |
| vassert(p[10] == 0x41); |
| vassert(p[11] == 0xFF); |
| vassert(p[12] == 0xD3); |
| /* And what we want to change it to is either: |
| (general case): |
| movabsq $place_to_jump_to, %r11 |
| jmpq *%r11 |
| viz |
| 49 BB <8 bytes value == place_to_jump_to> |
| 41 FF E3 |
| So it's the same length (convenient, huh) and we don't |
| need to change all the bits. |
| ---OR--- |
| in the case where the displacement falls within 32 bits |
| jmpq disp32 where disp32 is relative to the next insn |
| ud2; ud2; ud2; ud2 |
| viz |
| E9 <4 bytes == disp32> |
| 0F 0B 0F 0B 0F 0B 0F 0B |
| |
| In both cases the replacement has the same length as the original. |
| To remain sane & verifiable, |
| (1) limit the displacement for the short form to |
| (say) +/- one billion, so as to avoid wraparound |
| off-by-ones |
| (2) even if the short form is applicable, once every (say) |
| 1024 times use the long form anyway, so as to maintain |
| verifiability |
| */ |
| /* This is the delta we need to put into a JMP d32 insn. It's |
| relative to the start of the next insn, hence the -5. */ |
| Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; |
| Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000; |
| |
| static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */ |
| if (shortOK) { |
| shortCTR++; // thread safety bleh |
| if (0 == (shortCTR & 0x3FF)) { |
| shortOK = False; |
| if (0) |
| vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, " |
| "using long jmp\n", shortCTR); |
| } |
| } |
| |
| /* And make the modifications. */ |
| if (shortOK) { |
| p[0] = 0xE9; |
| p[1] = (delta >> 0) & 0xFF; |
| p[2] = (delta >> 8) & 0xFF; |
| p[3] = (delta >> 16) & 0xFF; |
| p[4] = (delta >> 24) & 0xFF; |
| p[5] = 0x0F; p[6] = 0x0B; |
| p[7] = 0x0F; p[8] = 0x0B; |
| p[9] = 0x0F; p[10] = 0x0B; |
| p[11] = 0x0F; p[12] = 0x0B; |
| /* sanity check on the delta -- top 32 are all 0 or all 1 */ |
| delta >>= 32; |
| vassert(delta == 0LL || delta == -1LL); |
| } else { |
| /* Minimal modifications from the starting sequence. */ |
| *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to); |
| p[12] = 0xE3; |
| } |
| VexInvalRange vir = {0, 0}; |
| return vir; |
| } |
| |
| |
| /* NB: what goes on here has to be very closely coordinated with the |
| emitInstr case for XDirect, above. */ |
| VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain, |
| void* place_to_jump_to_EXPECTED, |
| void* disp_cp_chain_me ) |
| { |
| /* What we're expecting to see is either: |
| (general case) |
| movabsq $place_to_jump_to_EXPECTED, %r11 |
| jmpq *%r11 |
| viz |
| 49 BB <8 bytes value == place_to_jump_to_EXPECTED> |
| 41 FF E3 |
| ---OR--- |
| in the case where the displacement falls within 32 bits |
| jmpq d32 |
| ud2; ud2; ud2; ud2 |
| viz |
| E9 <4 bytes == disp32> |
| 0F 0B 0F 0B 0F 0B 0F 0B |
| */ |
| UChar* p = (UChar*)place_to_unchain; |
| Bool valid = False; |
| if (p[0] == 0x49 && p[1] == 0xBB |
| && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED) |
| && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) { |
| /* it's the long form */ |
| valid = True; |
| } |
| else |
| if (p[0] == 0xE9 |
| && p[5] == 0x0F && p[6] == 0x0B |
| && p[7] == 0x0F && p[8] == 0x0B |
| && p[9] == 0x0F && p[10] == 0x0B |
| && p[11] == 0x0F && p[12] == 0x0B) { |
| /* It's the short form. Check the offset is right. */ |
| Int s32 = *(Int*)(&p[1]); |
| Long s64 = (Long)s32; |
| if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) { |
| valid = True; |
| if (0) |
| vex_printf("QQQ unchainXDirect_AMD64: found short form\n"); |
| } |
| } |
| vassert(valid); |
| /* And what we want to change it to is: |
| movabsq $disp_cp_chain_me, %r11 |
| call *%r11 |
| viz |
| 49 BB <8 bytes value == disp_cp_chain_me> |
| 41 FF D3 |
| So it's the same length (convenient, huh). |
| */ |
| p[0] = 0x49; |
| p[1] = 0xBB; |
| *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me); |
| p[10] = 0x41; |
| p[11] = 0xFF; |
| p[12] = 0xD3; |
| VexInvalRange vir = {0, 0}; |
| return vir; |
| } |
| |
| |
| /* Patch the counter address into a profile inc point, as previously |
| created by the Ain_ProfInc case for emit_AMD64Instr. */ |
| VexInvalRange patchProfInc_AMD64 ( void* place_to_patch, |
| ULong* location_of_counter ) |
| { |
| vassert(sizeof(ULong*) == 8); |
| UChar* p = (UChar*)place_to_patch; |
| vassert(p[0] == 0x49); |
| vassert(p[1] == 0xBB); |
| vassert(p[2] == 0x00); |
| vassert(p[3] == 0x00); |
| vassert(p[4] == 0x00); |
| vassert(p[5] == 0x00); |
| vassert(p[6] == 0x00); |
| vassert(p[7] == 0x00); |
| vassert(p[8] == 0x00); |
| vassert(p[9] == 0x00); |
| vassert(p[10] == 0x49); |
| vassert(p[11] == 0xFF); |
| vassert(p[12] == 0x03); |
| ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter); |
| p[2] = imm64 & 0xFF; imm64 >>= 8; |
| p[3] = imm64 & 0xFF; imm64 >>= 8; |
| p[4] = imm64 & 0xFF; imm64 >>= 8; |
| p[5] = imm64 & 0xFF; imm64 >>= 8; |
| p[6] = imm64 & 0xFF; imm64 >>= 8; |
| p[7] = imm64 & 0xFF; imm64 >>= 8; |
| p[8] = imm64 & 0xFF; imm64 >>= 8; |
| p[9] = imm64 & 0xFF; imm64 >>= 8; |
| VexInvalRange vir = {0, 0}; |
| return vir; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- end host_amd64_defs.c ---*/ |
| /*---------------------------------------------------------------*/ |