| |
| /*---------------------------------------------------------------*/ |
| /*--- ---*/ |
| /*--- This file (guest-amd64/ghelpers.c) is ---*/ |
| /*--- Copyright (c) 2004 OpenWorks LLP. All rights reserved. ---*/ |
| /*--- ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* |
| This file is part of LibVEX, a library for dynamic binary |
| instrumentation and translation. |
| |
| Copyright (C) 2004 OpenWorks, LLP. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; Version 2 dated June 1991 of the |
| license. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability |
| for damages. See the GNU General Public License for more details. |
| |
| Neither the names of the U.S. Department of Energy nor the |
| University of California nor the names of its contributors may be |
| used to endorse or promote products derived from this software |
| without prior written permission. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
| USA. |
| */ |
| |
| #include "libvex_basictypes.h" |
| #include "libvex_emwarn.h" |
| #include "libvex_guest_amd64.h" |
| #include "libvex_ir.h" |
| #include "libvex.h" |
| |
| #include "main/vex_util.h" |
| #include "guest-amd64/gdefs.h" |
| #include "guest-generic/g_generic_x87.h" |
| |
| |
| /* This file contains helper functions for amd64 guest code. |
| Calls to these functions are generated by the back end. |
| These calls are of course in the host machine code and |
| this file will be compiled to host machine code, so that |
| all makes sense. |
| |
| Only change the signatures of these helper functions very |
| carefully. If you change the signature here, you'll have to change |
| the parameters passed to it in the IR calls constructed by |
| guest-amd64/toIR.c. |
| |
| The convention used is that all functions called from generated |
| code are named amd64g_<something>, and any function whose name lacks |
| that prefix is not called from generated code. Note that some |
| LibVEX_* functions can however be called by VEX's client, but that |
| is not the same as calling them from VEX-generated code. |
| */ |
| |
| |
| /* Set to 1 to get detailed profiling info about use of the flag |
| machinery. */ |
| #define PROFILE_RFLAGS 0 |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- %rflags run-time helpers. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags |
| after imulq/mulq. */ |
| |
| static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) |
| { |
| ULong u0, v0, w0; |
| Long u1, v1, w1, w2, t; |
| u0 = u & 0xFFFFFFFFULL; |
| u1 = u >> 32; |
| v0 = v & 0xFFFFFFFFULL; |
| v1 = v >> 32; |
| w0 = u0 * v0; |
| t = u1 * v0 + (w0 >> 32); |
| w1 = t & 0xFFFFFFFFULL; |
| w2 = t >> 32; |
| w1 = u0 * v1 + w1; |
| *rHi = u1 * v1 + w2 + (w1 >> 32); |
| *rLo = u * v; |
| } |
| |
| static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) |
| { |
| ULong u0, v0, w0; |
| ULong u1, v1, w1,w2,t; |
| u0 = u & 0xFFFFFFFFULL; |
| u1 = u >> 32; |
| v0 = v & 0xFFFFFFFFULL; |
| v1 = v >> 32; |
| w0 = u0 * v0; |
| t = u1 * v0 + (w0 >> 32); |
| w1 = t & 0xFFFFFFFFULL; |
| w2 = t >> 32; |
| w1 = u0 * v1 + w1; |
| *rHi = u1 * v1 + w2 + (w1 >> 32); |
| *rLo = u * v; |
| } |
| |
| |
| static const UChar parity_table[256] = { |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| }; |
| |
| /* generalised left-shifter */ |
| static inline Long lshift ( Long x, Int n ) |
| { |
| if (n >= 0) |
| return x << n; |
| else |
| return x >> (-n); |
| } |
| |
| /* identity on ULong */ |
| static inline ULong idULong ( ULong x ) |
| { |
| return x; |
| } |
| |
| |
| #define PREAMBLE(__data_bits) \ |
| /* const */ ULong DATA_MASK \ |
| = __data_bits==8 \ |
| ? 0xFFULL \ |
| : (__data_bits==16 \ |
| ? 0xFFFFULL \ |
| : (__data_bits==32 \ |
| ? 0xFFFFFFFFULL \ |
| : 0xFFFFFFFFFFFFFFFFULL)); \ |
| /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ |
| /* const */ ULong CC_DEP1 = cc_dep1_formal; \ |
| /* const */ ULong CC_DEP2 = cc_dep2_formal; \ |
| /* const */ ULong CC_NDEP = cc_ndep_formal; \ |
| /* Four bogus assignments, which hopefully gcc can */ \ |
| /* optimise away, and which stop it complaining about */ \ |
| /* unused variables. */ \ |
| SIGN_MASK = SIGN_MASK; \ |
| DATA_MASK = DATA_MASK; \ |
| CC_DEP2 = CC_DEP2; \ |
| CC_NDEP = CC_NDEP; |
| |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2; \ |
| res = argL + argR; \ |
| cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2; \ |
| res = argL - argR; \ |
| cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, oldC, res; \ |
| oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2 ^ oldC; \ |
| res = (argL + argR) + oldC; \ |
| if (oldC) \ |
| cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ |
| else \ |
| cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, oldC, res; \ |
| oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2 ^ oldC; \ |
| res = (argL - argR) - oldC; \ |
| if (oldC) \ |
| cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ |
| else \ |
| cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = 0; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| res = CC_DEP1; \ |
| argL = res - 1; \ |
| argR = 1; \ |
| cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| res = CC_DEP1; \ |
| argL = res + 1; \ |
| argR = 1; \ |
| cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = ((res & DATA_MASK) \ |
| == ((ULong)SIGN_MASK - 1)) << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; /* undefined */ \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| /* of is defined if shift count == 1 */ \ |
| of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = CC_DEP2 & 1; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; /* undefined */ \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| /* of is defined if shift count == 1 */ \ |
| of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ |
| /* DEP1 = result, NDEP = old flags */ |
| #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long fl \ |
| = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| | (AMD64G_CC_MASK_C & CC_DEP1) \ |
| | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 11-(DATA_BITS-1)) \ |
| ^ lshift(CC_DEP1, 11))); \ |
| return fl; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ |
| /* DEP1 = result, NDEP = old flags */ |
| #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long fl \ |
| = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ |
| | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 11-(DATA_BITS-1)) \ |
| ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ |
| return fl; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ |
| DATA_U2TYPE, NARROWto2U) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| DATA_UTYPE hi; \ |
| DATA_UTYPE lo \ |
| = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ |
| * ((DATA_UTYPE)CC_DEP2) ); \ |
| DATA_U2TYPE rr \ |
| = NARROWto2U( \ |
| ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ |
| * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ |
| hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ |
| cf = (hi != 0); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ |
| DATA_S2TYPE, NARROWto2S) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| DATA_STYPE hi; \ |
| DATA_STYPE lo \ |
| = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ |
| * ((DATA_STYPE)CC_DEP2) ); \ |
| DATA_S2TYPE rr \ |
| = NARROWto2S( \ |
| ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ |
| * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ |
| hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ |
| cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_UMULQ \ |
| { \ |
| PREAMBLE(64); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| ULong lo, hi; \ |
| mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ |
| cf = (hi != 0); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - 64) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SMULQ \ |
| { \ |
| PREAMBLE(64); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long lo, hi; \ |
| mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ |
| cf = (hi != (lo >>/*s*/ (64-1))); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - 64) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| |
| #if PROFILE_RFLAGS |
| |
| static Bool initted = False; |
| |
| /* C flag, fast route */ |
| static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; |
| /* C flag, slow route */ |
| static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; |
| /* table for calculate_cond */ |
| static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; |
| /* total entry counts for calc_all, calc_c, calc_cond. */ |
| static UInt n_calc_all = 0; |
| static UInt n_calc_c = 0; |
| static UInt n_calc_cond = 0; |
| |
| #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) |
| |
| |
| static void showCounts ( void ) |
| { |
| Int op, co; |
| Char ch; |
| vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", |
| n_calc_all, n_calc_cond, n_calc_c); |
| |
| vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" |
| " S NS P NP L NL LE NLE\n"); |
| vex_printf(" -----------------------------------------------------" |
| "----------------------------------------\n"); |
| for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| |
| ch = ' '; |
| if (op > 0 && (op-1) % 4 == 0) |
| ch = 'B'; |
| if (op > 0 && (op-1) % 4 == 1) |
| ch = 'W'; |
| if (op > 0 && (op-1) % 4 == 2) |
| ch = 'L'; |
| if (op > 0 && (op-1) % 4 == 3) |
| ch = 'Q'; |
| |
| vex_printf("%2d%c: ", op, ch); |
| vex_printf("%6u ", tabc_slow[op]); |
| vex_printf("%6u ", tabc_fast[op]); |
| for (co = 0; co < 16; co++) { |
| Int n = tab_cond[op][co]; |
| if (n >= 1000) { |
| vex_printf(" %3dK", n / 1000); |
| } else |
| if (n >= 0) { |
| vex_printf(" %3d ", n ); |
| } else { |
| vex_printf(" "); |
| } |
| } |
| vex_printf("\n"); |
| } |
| vex_printf("\n"); |
| } |
| |
| static void initCounts ( void ) |
| { |
| Int op, co; |
| initted = True; |
| for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| tabc_fast[op] = tabc_slow[op] = 0; |
| for (co = 0; co < 16; co++) |
| tab_cond[op][co] = 0; |
| } |
| } |
| |
| #endif /* PROFILE_RFLAGS */ |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate all the 6 flags from the supplied thunk parameters. |
| Worker function, not directly called from generated code. */ |
| static |
| ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, |
| ULong cc_dep1_formal, |
| ULong cc_dep2_formal, |
| ULong cc_ndep_formal ) |
| { |
| switch (cc_op) { |
| case AMD64G_CC_OP_COPY: |
| return cc_dep1_formal |
| & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
| | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); |
| |
| case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); |
| case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); |
| case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); |
| case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); |
| |
| case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); |
| case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); |
| case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); |
| case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); |
| |
| case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); |
| case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); |
| case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); |
| case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); |
| |
| case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); |
| case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); |
| case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); |
| case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); |
| |
| case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); |
| case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); |
| case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); |
| case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); |
| |
| case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); |
| case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); |
| case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); |
| case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); |
| |
| case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); |
| case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); |
| case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); |
| case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); |
| |
| case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); |
| case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); |
| case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); |
| case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); |
| |
| case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); |
| case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); |
| case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); |
| case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); |
| |
| case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); |
| case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); |
| case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); |
| case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); |
| |
| case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); |
| case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); |
| case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); |
| case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); |
| |
| case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, |
| UShort, toUShort ); |
| case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, |
| UInt, toUInt ); |
| case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, |
| ULong, idULong ); |
| |
| case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; |
| |
| case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, |
| Short, toUShort ); |
| case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, |
| Int, toUInt ); |
| case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, |
| Long, idULong ); |
| |
| case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; |
| |
| default: |
| /* shouldn't really make these calls from generated code */ |
| vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" |
| "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
| cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); |
| vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); |
| } |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate all the 6 flags from the supplied thunk parameters. */ |
| ULong amd64g_calculate_rflags_all ( ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| n_calc_all++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| return |
| amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate just the carry flag from the supplied thunk parameters. */ |
| ULong amd64g_calculate_rflags_c ( ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| n_calc_c++; |
| tabc_fast[cc_op]++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| |
| /* Fast-case some common ones. */ |
| switch (cc_op) { |
| case AMD64G_CC_OP_COPY: |
| return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; |
| case AMD64G_CC_OP_LOGICQ: |
| case AMD64G_CC_OP_LOGICL: |
| case AMD64G_CC_OP_LOGICW: |
| case AMD64G_CC_OP_LOGICB: |
| return 0; |
| // case AMD64G_CC_OP_SUBL: |
| // return ((UInt)cc_dep1) < ((UInt)cc_dep2) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_SUBW: |
| // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_SUBB: |
| // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_INCL: |
| // case AMD64G_CC_OP_DECL: |
| // return cc_ndep & AMD64G_CC_MASK_C; |
| default: |
| break; |
| } |
| |
| # if PROFILE_RFLAGS |
| tabc_fast[cc_op]--; |
| tabc_slow[cc_op]++; |
| # endif |
| |
| return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) |
| & AMD64G_CC_MASK_C; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* returns 1 or 0 */ |
| ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, |
| ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, |
| cc_dep2, cc_ndep); |
| ULong of,sf,zf,cf,pf; |
| ULong inv = cond & 1; |
| |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| tab_cond[cc_op][cond]++; |
| n_calc_cond++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| |
| switch (cond) { |
| case AMD64CondNO: |
| case AMD64CondO: /* OF == 1 */ |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| return 1 & (inv ^ of); |
| |
| case AMD64CondNZ: |
| case AMD64CondZ: /* ZF == 1 */ |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ zf); |
| |
| case AMD64CondNB: |
| case AMD64CondB: /* CF == 1 */ |
| cf = rflags >> AMD64G_CC_SHIFT_C; |
| return 1 & (inv ^ cf); |
| break; |
| |
| case AMD64CondNBE: |
| case AMD64CondBE: /* (CF or ZF) == 1 */ |
| cf = rflags >> AMD64G_CC_SHIFT_C; |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ (cf | zf)); |
| break; |
| |
| case AMD64CondNS: |
| case AMD64CondS: /* SF == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| return 1 & (inv ^ sf); |
| |
| case AMD64CondNP: |
| case AMD64CondP: /* PF == 1 */ |
| pf = rflags >> AMD64G_CC_SHIFT_P; |
| return 1 & (inv ^ pf); |
| |
| case AMD64CondNL: |
| case AMD64CondL: /* (SF xor OF) == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| return 1 & (inv ^ (sf ^ of)); |
| break; |
| |
| case AMD64CondNLE: |
| case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ ((sf ^ of) | zf)); |
| break; |
| |
| default: |
| /* shouldn't really make these calls from generated code */ |
| vex_printf("amd64g_calculate_condition" |
| "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
| cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| vpanic("amd64g_calculate_condition"); |
| } |
| } |
| |
| |
| /* VISIBLE TO LIBVEX CLIENT */ |
| ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state ) |
| { |
| ULong rflags = amd64g_calculate_rflags_all_WRK( |
| vex_state->guest_CC_OP, |
| vex_state->guest_CC_DEP1, |
| vex_state->guest_CC_DEP2, |
| vex_state->guest_CC_NDEP |
| ); |
| Long dflag = vex_state->guest_DFLAG; |
| vassert(dflag == 1 || dflag == -1); |
| if (dflag == -1) |
| rflags |= (1<<10); |
| if (vex_state->guest_IDFLAG == 1) |
| rflags |= (1<<21); |
| return rflags; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- %rflags translation-time function specialisers. ---*/ |
| /*--- These help iropt specialise calls the above run-time ---*/ |
| /*--- %rflags functions. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Used by the optimiser to try specialisations. Returns an |
| equivalent expression, or NULL if none. */ |
| |
| static Bool isU64 ( IRExpr* e, ULong n ) |
| { |
| return toBool( e->tag == Iex_Const |
| && e->Iex.Const.con->tag == Ico_U64 |
| && e->Iex.Const.con->Ico.U64 == n ); |
| } |
| |
| IRExpr* guest_amd64_spechelper ( HChar* function_name, |
| IRExpr** args ) |
| { |
| # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) |
| # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) |
| # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) |
| # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) |
| |
| Int i, arity = 0; |
| for (i = 0; args[i]; i++) |
| arity++; |
| # if 0 |
| vex_printf("spec request:\n"); |
| vex_printf(" %s ", function_name); |
| for (i = 0; i < arity; i++) { |
| vex_printf(" "); |
| ppIRExpr(args[i]); |
| } |
| vex_printf("\n"); |
| # endif |
| |
| /* --------- specialising "amd64g_calculate_condition" --------- */ |
| |
| if (vex_streq(function_name, "amd64g_calculate_condition")) { |
| /* specialise calls to above "calculate condition" function */ |
| IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; |
| vassert(arity == 5); |
| cond = args[0]; |
| cc_op = args[1]; |
| cc_dep1 = args[2]; |
| cc_dep2 = args[3]; |
| |
| /*---------------- ADDQ ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { |
| /* long long add, then Z --> test (dst+src == 0) */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, |
| binop(Iop_Add64, cc_dep1, cc_dep2), |
| mkU64(0))); |
| } |
| |
| /*---------------- SUBL ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { |
| /* long sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ32, |
| unop(Iop_64to32,cc_dep1), |
| unop(Iop_64to32,cc_dep2))); |
| } |
| |
| //.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) { |
| //.. /* long sub/cmp, then NZ --> test dst!=src */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpNE32, cc_dep1, cc_dep2)); |
| //.. } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { |
| /* long sub/cmp, then L (signed less than) |
| --> test dst <s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64S, |
| binop(Iop_Shl64,cc_dep1,mkU8(32)), |
| binop(Iop_Shl64,cc_dep2,mkU8(32)))); |
| |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { |
| /* long sub/cmp, then L (signed less than or equal) |
| --> test dst <s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64S, |
| binop(Iop_Shl64,cc_dep1,mkU8(32)), |
| binop(Iop_Shl64,cc_dep2,mkU8(32)))); |
| |
| } |
| |
| |
| //.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondBE)) { |
| //.. /* long sub/cmp, then BE (unsigned less than or equal) |
| //.. --> test dst <=u src */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpLE32U, cc_dep1, cc_dep2)); |
| //.. } |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondB)) { |
| //.. /* long sub/cmp, then B (unsigned less than) |
| //.. --> test dst <u src */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); |
| //.. } |
| |
| /*---------------- SUBW ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { |
| /* word sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ16, |
| unop(Iop_64to16,cc_dep1), |
| unop(Iop_64to16,cc_dep2))); |
| } |
| |
| /*---------------- SUBB ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { |
| /* byte sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ8, |
| unop(Iop_64to8,cc_dep1), |
| unop(Iop_64to8,cc_dep2))); |
| } |
| |
| // if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { |
| // /* byte sub/cmp, then NZ --> test dst!=src */ |
| // return unop(Iop_32Uto64, |
| // unop(Iop_1Uto32, |
| // binop(Iop_CmpNE8, |
| // unop(Iop_32to8,unop(Iop_64to32,cc_dep1)), |
| // unop(Iop_32to8,unop(Iop_64to32,cc_dep2))))); |
| // } |
| |
| //.. if (isU32(cc_op, AMD64G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) { |
| //.. /* long sub/cmp, then NBE (unsigned greater than) |
| //.. --> test src <=u dst */ |
| //.. /* Note, args are opposite way round from the usual */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpLT32U, |
| //.. binop(Iop_And32,cc_dep2,mkU32(0xFF)), |
| //.. binop(Iop_And32,cc_dep1,mkU32(0xFF)))); |
| //.. } |
| |
| /*---------------- LOGICL ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { |
| /* long and/or/xor, then Z --> test dst==0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, |
| binop(Iop_Shl64,cc_dep1,mkU8(32)), |
| mkU64(0))); |
| } |
| |
| //.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondS)) { |
| //.. /* long and/or/xor, then S --> test dst <s 0 */ |
| //.. return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0))); |
| //.. } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { |
| /* long and/or/xor, then LE |
| This is pretty subtle. LOGIC sets SF and ZF according to the |
| result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but |
| OF is zero, so this reduces to SZ | ZF -- which will be 1 iff |
| the result is <=signed 0. Hence ... |
| */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64S, |
| binop(Iop_Shl64,cc_dep1,mkU8(32)), |
| mkU64(0))); |
| } |
| |
| //.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) { |
| //.. /* long and/or/xor, then BE |
| //.. LOGIC sets ZF according to the result and makes CF be zero. |
| //.. BE computes (CF | ZF), but CF is zero, so this reduces ZF |
| //.. -- which will be 1 iff the result is zero. Hence ... |
| //.. */ |
| //.. return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); |
| //.. } |
| //.. |
| //.. /*---------------- LOGICW ----------------*/ |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) { |
| //.. /* byte and/or/xor, then Z --> test dst==0 */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)), |
| //.. mkU32(0))); |
| //.. } |
| //.. |
| //.. /*---------------- LOGICB ----------------*/ |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) { |
| //.. /* byte and/or/xor, then Z --> test dst==0 */ |
| //.. return unop(Iop_1Uto32, |
| //.. binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)), |
| //.. mkU32(0))); |
| //.. } |
| //.. |
| //.. /*---------------- DECL ----------------*/ |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_DECL) && isU32(cond, X86CondZ)) { |
| //.. /* dec L, then Z --> test dst == 0 */ |
| //.. return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); |
| //.. } |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_DECL) && isU32(cond, X86CondS)) { |
| //.. /* dec L, then S --> compare DST <s 0 */ |
| //.. return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0))); |
| //.. } |
| //.. |
| //.. /*---------------- SHRL ----------------*/ |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_SHRL) && isU32(cond, X86CondZ)) { |
| //.. /* SHRL, then Z --> test dep1 == 0 */ |
| //.. return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); |
| //.. } |
| |
| /*---------------- COPY ----------------*/ |
| /* This can happen, as a result of amd64 FP compares: "comisd ... ; |
| jbe" for example. */ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) && |
| (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { |
| /* COPY, then BE --> extract C and Z from dep1, and test (C |
| or Z == 1). */ |
| /* COPY, then NBE --> extract C and Z from dep1, and test (C |
| or Z == 0). */ |
| ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpEQ64, |
| binop( |
| Iop_And64, |
| binop( |
| Iop_Or64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) |
| ), |
| mkU64(1) |
| ), |
| mkU64(nnn) |
| ) |
| ); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { |
| /* COPY, then B --> extract C dep1, and test (C == 1). */ |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpNE64, |
| binop( |
| Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| mkU64(1) |
| ), |
| mkU64(0) |
| ) |
| ); |
| } |
| |
| return NULL; |
| } |
| |
| /* --------- specialising "amd64g_calculate_rflags_c" --------- */ |
| |
| if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { |
| /* specialise calls to above "calculate_rflags_c" function */ |
| IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; |
| vassert(arity == 4); |
| cc_op = args[0]; |
| cc_dep1 = args[1]; |
| cc_dep2 = args[2]; |
| cc_ndep = args[3]; |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { |
| /* C after sub denotes unsigned less than */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64U, |
| binop(Iop_Shl64,cc_dep1,mkU8(32)), |
| binop(Iop_Shl64,cc_dep2,mkU8(32)))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { |
| /* C after sub denotes unsigned less than */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64U, |
| binop(Iop_And64,cc_dep1,mkU64(0xFF)), |
| binop(Iop_And64,cc_dep2,mkU64(0xFF)))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICL) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICW) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { |
| /* cflag after logic is zero */ |
| return mkU64(0); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) |
| || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { |
| /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ |
| return cc_ndep; |
| } |
| //.. if (isU64(cc_op, AMD64G_CC_OP_COPY)) { |
| //.. /* cflag after COPY is stored in DEP1. */ |
| //.. return |
| //.. binop( |
| //.. Iop_And64, |
| //.. binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| //.. mkU64(1) |
| //.. ); |
| //.. } |
| //.. # if 0 |
| //.. if (cc_op->tag == Iex_Const) { |
| //.. vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); |
| //.. } |
| //.. # endif |
| |
| return NULL; |
| } |
| |
| //.. /* --------- specialising "x86g_calculate_rflags_all" --------- */ |
| //.. |
| //.. if (vex_streq(function_name, "x86g_calculate_rflags_all")) { |
| //.. /* specialise calls to above "calculate_rflags_all" function */ |
| //.. IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; |
| //.. vassert(arity == 4); |
| //.. cc_op = args[0]; |
| //.. cc_dep1 = args[1]; |
| //.. cc_dep2 = args[2]; |
| //.. cc_ndep = args[3]; |
| //.. |
| //.. if (isU32(cc_op, AMD64G_CC_OP_COPY)) { |
| //.. /* eflags after COPY are stored in DEP1. */ |
| //.. return |
| //.. binop( |
| //.. Iop_And32, |
| //.. cc_dep1, |
| //.. mkU32(AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
| //.. | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P) |
| //.. ); |
| //.. } |
| //.. return NULL; |
| //.. } |
| |
| # undef unop |
| # undef binop |
| # undef mkU64 |
| # undef mkU8 |
| |
| return NULL; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Supporting functions for x87 FPU activities. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| // MAYBE NOT TRUE: /* CALLED FROM GENERATED CODE */ |
| // MAYBE NOT TRUE: /* DIRTY HELPER (writes guest state) */ |
| /* Initialise the x87 FPU state as per 'finit'. */ |
| static |
| void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) |
| { |
| Int i; |
| gst->guest_FTOP = 0; |
| for (i = 0; i < 8; i++) { |
| gst->guest_FPTAG[i] = 0; /* empty */ |
| gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ |
| } |
| gst->guest_FPROUND = (ULong)Irrm_NEAREST; |
| gst->guest_FC3210 = 0; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (reads guest memory) */ |
| ULong amd64g_loadF80le ( ULong addrU ) |
| { |
| ULong f64; |
| convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); |
| return f64; |
| } |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (writes guest memory) */ |
| void amd64g_storeF80le ( ULong addrU, ULong f64 ) |
| { |
| convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* CLEAN HELPER */ |
| /* mxcsr[15:0] contains a SSE native format MXCSR value. |
| Extract from it the required SSEROUND value and any resulting |
| emulation warning, and return (warn << 32) | sseround value. |
| */ |
| ULong amd64g_check_ldmxcsr ( ULong mxcsr ) |
| { |
| /* Decide on a rounding mode. mxcsr[14:13] holds it. */ |
| /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| ULong rmode = (mxcsr >> 13) & 3; |
| |
| /* Detect any required emulation warnings. */ |
| VexEmWarn ew = EmWarn_NONE; |
| |
| if ((mxcsr & 0x1F80) != 0x1F80) { |
| /* unmasked exceptions! */ |
| ew = EmWarn_X86_sseExns; |
| } |
| else |
| if (mxcsr & (1<<15)) { |
| /* FZ is set */ |
| ew = EmWarn_X86_fz; |
| } |
| else |
| if (mxcsr & (1<<6)) { |
| /* DAZ is set */ |
| ew = EmWarn_X86_daz; |
| } |
| |
| return (((ULong)ew) << 32) | ((ULong)rmode); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* CLEAN HELPER */ |
| /* Given sseround as an IRRoundingMode value, create a suitable SSE |
| native format MXCSR value. */ |
| ULong amd64g_create_mxcsr ( ULong sseround ) |
| { |
| sseround &= 3; |
| return 0x1F80 | (sseround << 13); |
| } |
| |
| |
| /* CLEAN HELPER */ |
| /* fpucw[15:0] contains a x87 native format FPU control word. |
| Extract from it the required FPROUND value and any resulting |
| emulation warning, and return (warn << 32) | fpround value. |
| */ |
| ULong amd64g_check_fldcw ( ULong fpucw ) |
| { |
| /* Decide on a rounding mode. fpucw[11:10] holds it. */ |
| /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| ULong rmode = (fpucw >> 10) & 3; |
| |
| /* Detect any required emulation warnings. */ |
| VexEmWarn ew = EmWarn_NONE; |
| |
| if ((fpucw & 0x3F) != 0x3F) { |
| /* unmasked exceptions! */ |
| ew = EmWarn_X86_x87exns; |
| } |
| else |
| if (((fpucw >> 8) & 3) != 3) { |
| /* unsupported precision */ |
| ew = EmWarn_X86_x87precision; |
| } |
| |
| return (((ULong)ew) << 32) | ((ULong)rmode); |
| } |
| |
| |
| /* CLEAN HELPER */ |
| /* Given fpround as an IRRoundingMode value, create a suitable x87 |
| native format FPU control word. */ |
| ULong amd64g_create_fpucw ( ULong fpround ) |
| { |
| fpround &= 3; |
| return 0x037F | (fpround << 10); |
| } |
| |
| |
| /* This is used to implement 'fldenv'. |
| Reads 28 bytes at x87_state[0 .. 27]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, |
| /*IN*/HWord x87_state) |
| { |
| Int stno, preg; |
| UInt tag; |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State* x87 = (Fpu_State*)x87_state; |
| UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; |
| UInt tagw = x87->env[FP_ENV_TAG]; |
| UInt fpucw = x87->env[FP_ENV_CTRL]; |
| ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700; |
| VexEmWarn ew; |
| ULong fpround; |
| ULong pair; |
| |
| /* Copy tags */ |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| tag = (tagw >> (2*preg)) & 3; |
| if (tag == 3) { |
| /* register is empty */ |
| vexTags[preg] = 0; |
| } else { |
| /* register is non-empty */ |
| vexTags[preg] = 1; |
| } |
| } |
| |
| /* stack pointer */ |
| vex_state->guest_FTOP = ftop; |
| |
| /* status word */ |
| vex_state->guest_FC3210 = c3210; |
| |
| /* handle the control word, setting FPROUND and detecting any |
| emulation warnings. */ |
| pair = amd64g_check_fldcw ( (ULong)fpucw ); |
| fpround = pair & 0xFFFFFFFFULL; |
| ew = (VexEmWarn)(pair >> 32); |
| |
| vex_state->guest_FPROUND = fpround & 3; |
| |
| /* emulation warnings --> caller */ |
| return ew; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| /* Create an x87 FPU env from the guest state, as close as we can |
| approximate it. Writes 28 bytes at x87_state[0..27]. */ |
| void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, |
| /*OUT*/HWord x87_state ) |
| { |
| Int i, stno, preg; |
| UInt tagw; |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State* x87 = (Fpu_State*)x87_state; |
| UInt ftop = vex_state->guest_FTOP; |
| ULong c3210 = vex_state->guest_FC3210; |
| |
| for (i = 0; i < 14; i++) |
| x87->env[i] = 0; |
| |
| x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| x87->env[FP_ENV_STAT] |
| = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); |
| x87->env[FP_ENV_CTRL] |
| = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); |
| |
| /* Compute the x87 tag word. */ |
| tagw = 0; |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| if (vexTags[preg] == 0) { |
| /* register is empty */ |
| tagw |= (3 << (2*preg)); |
| } else { |
| /* register is full. */ |
| tagw |= (0 << (2*preg)); |
| } |
| } |
| x87->env[FP_ENV_TAG] = toUShort(tagw); |
| |
| /* We don't dump the x87 registers, tho. */ |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Misc integer helpers, including rotates and CPUID. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Claim to be the following CPU: |
| vendor_id : AuthenticAMD |
| cpu family : 15 |
| model : 12 |
| model name : AMD Athlon(tm) 64 Processor 3200+ |
| stepping : 0 |
| cpu MHz : 2202.917 |
| cache size : 512 KB |
| fpu : yes |
| fpu_exception : yes |
| cpuid level : 1 |
| wp : yes |
| flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr |
| pge mca cmov pat pse36 clflush mmx fxsr sse sse2 |
| pni syscall nx mmxext lm 3dnowext 3dnow |
| bogomips : 4308.99 |
| TLB size : 1088 4K pages |
| clflush size : 64 |
| cache_alignment : 64 |
| address sizes : 40 bits physical, 48 bits virtual |
| power management: ts fid vid ttp |
| */ |
| void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st ) |
| { |
| # define SET_ABCD(_a,_b,_c,_d) \ |
| do { st->guest_RAX = (ULong)(_a); \ |
| st->guest_RBX = (ULong)(_b); \ |
| st->guest_RCX = (ULong)(_c); \ |
| st->guest_RDX = (ULong)(_d); \ |
| } while (0) |
| |
| switch (0xFFFFFFFF & st->guest_RAX) { |
| case 0x0: |
| SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); |
| break; |
| case 0x1: |
| SET_ABCD(0x00000fc0, 0x00000800, 0x00000000, 0x078bfbff); |
| break; |
| case 0x80000000: |
| SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); |
| break; |
| case 0x80000001: |
| SET_ABCD(0x00000fc0, 0x0000010a, 0x00000000, 0xe1d3fbff); |
| break; |
| case 0x80000002: |
| SET_ABCD(0x20444d41, 0x6c687441, 0x74286e6f, 0x3620296d); |
| break; |
| case 0x80000003: |
| SET_ABCD(0x72502034, 0x7365636f, 0x20726f73, 0x30303233); |
| break; |
| case 0x80000004: |
| SET_ABCD(0x0000002b, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000005: |
| SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); |
| break; |
| case 0x80000006: |
| SET_ABCD(0x00000000, 0x42004200, 0x02008140, 0x00000000); |
| break; |
| case 0x80000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); |
| break; |
| case 0x80000008: |
| SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| default: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| } |
| # undef SET_ABCD |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Helpers for MMX/SSE/SSE2. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| static inline UChar abdU8 ( UChar xx, UChar yy ) { |
| return toUChar(xx>yy ? xx-yy : yy-xx); |
| } |
| |
| static inline ULong mk32x2 ( UInt w1, UInt w0 ) { |
| return (((ULong)w1) << 32) | ((ULong)w0); |
| } |
| |
| static inline UShort sel16x4_3 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUShort(hi32 >> 16); |
| } |
| static inline UShort sel16x4_2 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUShort(hi32); |
| } |
| static inline UShort sel16x4_1 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUShort(lo32 >> 16); |
| } |
| static inline UShort sel16x4_0 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUShort(lo32); |
| } |
| |
| static inline UChar sel8x8_7 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 24); |
| } |
| static inline UChar sel8x8_6 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 16); |
| } |
| static inline UChar sel8x8_5 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 8); |
| } |
| static inline UChar sel8x8_4 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 0); |
| } |
| static inline UChar sel8x8_3 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 24); |
| } |
| static inline UChar sel8x8_2 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 16); |
| } |
| static inline UChar sel8x8_1 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 8); |
| } |
| static inline UChar sel8x8_0 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 0); |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) |
| { |
| return |
| mk32x2( |
| (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) |
| + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), |
| (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) |
| + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) |
| ); |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_mmx_pmovmskb ( ULong xx ) |
| { |
| ULong r = 0; |
| if (xx & (1ULL << (64-1))) r |= (1<<7); |
| if (xx & (1ULL << (56-1))) r |= (1<<6); |
| if (xx & (1ULL << (48-1))) r |= (1<<5); |
| if (xx & (1ULL << (40-1))) r |= (1<<4); |
| if (xx & (1ULL << (32-1))) r |= (1<<3); |
| if (xx & (1ULL << (24-1))) r |= (1<<2); |
| if (xx & (1ULL << (16-1))) r |= (1<<1); |
| if (xx & (1ULL << ( 8-1))) r |= (1<<0); |
| return r; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) |
| { |
| UInt t = 0; |
| t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); |
| t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); |
| t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); |
| t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); |
| t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| t &= 0xFFFF; |
| return (ULong)t; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) |
| { |
| ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi ); |
| ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo ); |
| return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Helpers for dealing with, and describing, ---*/ |
| /*--- guest state as a whole. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Initialise the entire amd64 guest state. */ |
| /* VISIBLE TO LIBVEX CLIENT */ |
| void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) |
| { |
| //Int i; |
| |
| vex_state->guest_RAX = 0; |
| vex_state->guest_RCX = 0; |
| vex_state->guest_RDX = 0; |
| vex_state->guest_RBX = 0; |
| vex_state->guest_RSP = 0; |
| vex_state->guest_RBP = 0; |
| vex_state->guest_RSI = 0; |
| vex_state->guest_RDI = 0; |
| vex_state->guest_R8 = 0; |
| vex_state->guest_R9 = 0; |
| vex_state->guest_R10 = 0; |
| vex_state->guest_R11 = 0; |
| vex_state->guest_R12 = 0; |
| vex_state->guest_R13 = 0; |
| vex_state->guest_R14 = 0; |
| vex_state->guest_R15 = 0; |
| |
| vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| vex_state->guest_CC_DEP1 = 0; |
| vex_state->guest_CC_DEP2 = 0; |
| vex_state->guest_CC_NDEP = 0; |
| |
| vex_state->guest_DFLAG = 1; /* forwards */ |
| vex_state->guest_IDFLAG = 0; |
| |
| /* HACK: represent the offset associated with %fs==0. This |
| assumes that %fs is only ever zero. */ |
| vex_state->guest_FS_ZERO = 0; |
| |
| vex_state->guest_RIP = 0; |
| |
| /* Initialise the simulated FPU */ |
| amd64g_dirtyhelper_FINIT( vex_state ); |
| |
| /* Initialise the SSE state. */ |
| # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; |
| |
| vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; |
| SSEZERO(vex_state->guest_XMM0); |
| SSEZERO(vex_state->guest_XMM1); |
| SSEZERO(vex_state->guest_XMM2); |
| SSEZERO(vex_state->guest_XMM3); |
| SSEZERO(vex_state->guest_XMM4); |
| SSEZERO(vex_state->guest_XMM5); |
| SSEZERO(vex_state->guest_XMM6); |
| SSEZERO(vex_state->guest_XMM7); |
| SSEZERO(vex_state->guest_XMM8); |
| SSEZERO(vex_state->guest_XMM9); |
| SSEZERO(vex_state->guest_XMM10); |
| SSEZERO(vex_state->guest_XMM11); |
| SSEZERO(vex_state->guest_XMM12); |
| SSEZERO(vex_state->guest_XMM13); |
| SSEZERO(vex_state->guest_XMM14); |
| SSEZERO(vex_state->guest_XMM15); |
| |
| # undef SSEZERO |
| |
| vex_state->guest_EMWARN = EmWarn_NONE; |
| |
| /* These should not ever be either read or written, but we |
| initialise them anyway. */ |
| vex_state->guest_TISTART = 0; |
| vex_state->guest_TILEN = 0; |
| } |
| |
| |
| /* Figure out if any part of the guest state contained in minoff |
| .. maxoff requires precise memory exceptions. If in doubt return |
| True (but this is generates significantly slower code). |
| |
| By default we enforce precise exns for guest %RSP, %RBP and %RIP |
| only. These are the minimum needed to extract correct stack |
| backtraces from amd64 code. |
| */ |
| Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff, |
| Int maxoff) |
| { |
| Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); |
| Int rbp_max = rbp_min + 8 - 1; |
| Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); |
| Int rsp_max = rsp_min + 8 - 1; |
| Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); |
| Int rip_max = rip_min + 8 - 1; |
| |
| if (maxoff < rbp_min || minoff > rbp_max) { |
| /* no overlap with rbp */ |
| } else { |
| return True; |
| } |
| |
| if (maxoff < rsp_min || minoff > rsp_max) { |
| /* no overlap with rsp */ |
| } else { |
| return True; |
| } |
| |
| if (maxoff < rip_min || minoff > rip_max) { |
| /* no overlap with eip */ |
| } else { |
| return True; |
| } |
| |
| return False; |
| } |
| |
| |
| #define ALWAYSDEFD(field) \ |
| { offsetof(VexGuestAMD64State, field), \ |
| (sizeof ((VexGuestAMD64State*)0)->field) } |
| |
| VexGuestLayout |
| amd64guest_layout |
| = { |
| /* Total size of the guest state, in bytes. */ |
| .total_sizeB = sizeof(VexGuestAMD64State), |
| |
| /* Describe the stack pointer. */ |
| .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), |
| .sizeof_SP = 8, |
| |
| /* Describe the instruction pointer. */ |
| .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), |
| .sizeof_IP = 8, |
| |
| /* Describe any sections to be regarded by Memcheck as |
| 'always-defined'. */ |
| .n_alwaysDefd = 12, |
| |
| /* flags thunk: OP and NDEP are always defd, whereas DEP1 |
| and DEP2 have to be tracked. See detailed comment in |
| gdefs.h on meaning of thunk fields. */ |
| .alwaysDefd |
| = { /* 0 */ ALWAYSDEFD(guest_CC_OP), |
| /* 1 */ ALWAYSDEFD(guest_CC_NDEP), |
| /* 2 */ ALWAYSDEFD(guest_DFLAG), |
| /* 3 */ ALWAYSDEFD(guest_IDFLAG), |
| /* 4 */ ALWAYSDEFD(guest_RIP), |
| /* 5 */ ALWAYSDEFD(guest_FS_ZERO), |
| /* 6 */ ALWAYSDEFD(guest_FTOP), |
| /* 7 */ ALWAYSDEFD(guest_FPTAG), |
| /* 8 */ ALWAYSDEFD(guest_FPROUND), |
| /* 9 */ ALWAYSDEFD(guest_FC3210), |
| // /* */ ALWAYSDEFD(guest_CS), |
| // /* */ ALWAYSDEFD(guest_DS), |
| // /* */ ALWAYSDEFD(guest_ES), |
| // /* */ ALWAYSDEFD(guest_FS), |
| // /* */ ALWAYSDEFD(guest_GS), |
| // /* */ ALWAYSDEFD(guest_SS), |
| // /* */ ALWAYSDEFD(guest_LDT), |
| // /* */ ALWAYSDEFD(guest_GDT), |
| /* 10 */ ALWAYSDEFD(guest_EMWARN), |
| /* 11 */ ALWAYSDEFD(guest_SSEROUND) |
| } |
| }; |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- end guest-amd64/ghelpers.c ---*/ |
| /*---------------------------------------------------------------*/ |