| |
| /*---------------------------------------------------------------*/ |
| /*--- begin guest_amd64_helpers.c ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* |
| This file is part of Valgrind, a dynamic binary instrumentation |
| framework. |
| |
| Copyright (C) 2004-2012 OpenWorks LLP |
| info@open-works.net |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. |
| |
| The GNU General Public License is contained in the file COPYING. |
| |
| Neither the names of the U.S. Department of Energy nor the |
| University of California nor the names of its contributors may be |
| used to endorse or promote products derived from this software |
| without prior written permission. |
| */ |
| |
| #include "libvex_basictypes.h" |
| #include "libvex_emnote.h" |
| #include "libvex_guest_amd64.h" |
| #include "libvex_ir.h" |
| #include "libvex.h" |
| |
| #include "main_util.h" |
| #include "main_globals.h" |
| #include "guest_generic_bb_to_IR.h" |
| #include "guest_amd64_defs.h" |
| #include "guest_generic_x87.h" |
| |
| |
| /* This file contains helper functions for amd64 guest code. |
| Calls to these functions are generated by the back end. |
| These calls are of course in the host machine code and |
| this file will be compiled to host machine code, so that |
| all makes sense. |
| |
| Only change the signatures of these helper functions very |
| carefully. If you change the signature here, you'll have to change |
| the parameters passed to it in the IR calls constructed by |
| guest-amd64/toIR.c. |
| |
| The convention used is that all functions called from generated |
| code are named amd64g_<something>, and any function whose name lacks |
| that prefix is not called from generated code. Note that some |
| LibVEX_* functions can however be called by VEX's client, but that |
| is not the same as calling them from VEX-generated code. |
| */ |
| |
| |
| /* Set to 1 to get detailed profiling info about use of the flag |
| machinery. */ |
| #define PROFILE_RFLAGS 0 |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- %rflags run-time helpers. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags |
| after imulq/mulq. */ |
| |
| static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) |
| { |
| ULong u0, v0, w0; |
| Long u1, v1, w1, w2, t; |
| u0 = u & 0xFFFFFFFFULL; |
| u1 = u >> 32; |
| v0 = v & 0xFFFFFFFFULL; |
| v1 = v >> 32; |
| w0 = u0 * v0; |
| t = u1 * v0 + (w0 >> 32); |
| w1 = t & 0xFFFFFFFFULL; |
| w2 = t >> 32; |
| w1 = u0 * v1 + w1; |
| *rHi = u1 * v1 + w2 + (w1 >> 32); |
| *rLo = u * v; |
| } |
| |
| static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) |
| { |
| ULong u0, v0, w0; |
| ULong u1, v1, w1,w2,t; |
| u0 = u & 0xFFFFFFFFULL; |
| u1 = u >> 32; |
| v0 = v & 0xFFFFFFFFULL; |
| v1 = v >> 32; |
| w0 = u0 * v0; |
| t = u1 * v0 + (w0 >> 32); |
| w1 = t & 0xFFFFFFFFULL; |
| w2 = t >> 32; |
| w1 = u0 * v1 + w1; |
| *rHi = u1 * v1 + w2 + (w1 >> 32); |
| *rLo = u * v; |
| } |
| |
| |
| static const UChar parity_table[256] = { |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| }; |
| |
| /* generalised left-shifter */ |
| static inline Long lshift ( Long x, Int n ) |
| { |
| if (n >= 0) |
| return x << n; |
| else |
| return x >> (-n); |
| } |
| |
| /* identity on ULong */ |
| static inline ULong idULong ( ULong x ) |
| { |
| return x; |
| } |
| |
| |
| #define PREAMBLE(__data_bits) \ |
| /* const */ ULong DATA_MASK \ |
| = __data_bits==8 \ |
| ? 0xFFULL \ |
| : (__data_bits==16 \ |
| ? 0xFFFFULL \ |
| : (__data_bits==32 \ |
| ? 0xFFFFFFFFULL \ |
| : 0xFFFFFFFFFFFFFFFFULL)); \ |
| /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ |
| /* const */ ULong CC_DEP1 = cc_dep1_formal; \ |
| /* const */ ULong CC_DEP2 = cc_dep2_formal; \ |
| /* const */ ULong CC_NDEP = cc_ndep_formal; \ |
| /* Four bogus assignments, which hopefully gcc can */ \ |
| /* optimise away, and which stop it complaining about */ \ |
| /* unused variables. */ \ |
| SIGN_MASK = SIGN_MASK; \ |
| DATA_MASK = DATA_MASK; \ |
| CC_DEP2 = CC_DEP2; \ |
| CC_NDEP = CC_NDEP; |
| |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2; \ |
| res = argL + argR; \ |
| cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2; \ |
| res = argL - argR; \ |
| cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, oldC, res; \ |
| oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2 ^ oldC; \ |
| res = (argL + argR) + oldC; \ |
| if (oldC) \ |
| cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ |
| else \ |
| cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, oldC, res; \ |
| oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| argL = CC_DEP1; \ |
| argR = CC_DEP2 ^ oldC; \ |
| res = (argL - argR) - oldC; \ |
| if (oldC) \ |
| cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ |
| else \ |
| cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = lshift((argL ^ argR) & (argL ^ res), \ |
| 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = 0; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| res = CC_DEP1; \ |
| argL = res - 1; \ |
| argR = 1; \ |
| cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long argL, argR, res; \ |
| res = CC_DEP1; \ |
| argL = res + 1; \ |
| argR = 1; \ |
| cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)res]; \ |
| af = (res ^ argL ^ argR) & 0x10; \ |
| zf = ((DATA_UTYPE)res == 0) << 6; \ |
| sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| of = ((res & DATA_MASK) \ |
| == ((ULong)SIGN_MASK - 1)) << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; /* undefined */ \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| /* of is defined if shift count == 1 */ \ |
| of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = CC_DEP2 & 1; \ |
| pf = parity_table[(UChar)CC_DEP1]; \ |
| af = 0; /* undefined */ \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| /* of is defined if shift count == 1 */ \ |
| of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| & AMD64G_CC_MASK_O; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ |
| /* DEP1 = result, NDEP = old flags */ |
| #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long fl \ |
| = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| | (AMD64G_CC_MASK_C & CC_DEP1) \ |
| | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 11-(DATA_BITS-1)) \ |
| ^ lshift(CC_DEP1, 11))); \ |
| return fl; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ |
| /* DEP1 = result, NDEP = old flags */ |
| #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long fl \ |
| = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ |
| | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 11-(DATA_BITS-1)) \ |
| ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ |
| return fl; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ |
| DATA_U2TYPE, NARROWto2U) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| DATA_UTYPE hi; \ |
| DATA_UTYPE lo \ |
| = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ |
| * ((DATA_UTYPE)CC_DEP2) ); \ |
| DATA_U2TYPE rr \ |
| = NARROWto2U( \ |
| ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ |
| * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ |
| hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ |
| cf = (hi != 0); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ |
| DATA_S2TYPE, NARROWto2S) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| DATA_STYPE hi; \ |
| DATA_STYPE lo \ |
| = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ |
| * ((DATA_STYPE)CC_DEP2) ); \ |
| DATA_S2TYPE rr \ |
| = NARROWto2S( \ |
| ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ |
| * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ |
| hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ |
| cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_UMULQ \ |
| { \ |
| PREAMBLE(64); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| ULong lo, hi; \ |
| mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ |
| cf = (hi != 0); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - 64) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_SMULQ \ |
| { \ |
| PREAMBLE(64); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| Long lo, hi; \ |
| mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ |
| cf = (hi != (lo >>/*s*/ (64-1))); \ |
| pf = parity_table[(UChar)lo]; \ |
| af = 0; /* undefined */ \ |
| zf = (lo == 0) << 6; \ |
| sf = lshift(lo, 8 - 64) & 0x80; \ |
| of = cf << 11; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = 0; \ |
| pf = 0; \ |
| af = 0; \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = ((DATA_UTYPE)CC_DEP2 != 0); \ |
| pf = 0; \ |
| af = 0; \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| pf = 0; \ |
| af = 0; \ |
| zf = 0; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \ |
| { \ |
| PREAMBLE(DATA_BITS); \ |
| { Long cf, pf, af, zf, sf, of; \ |
| cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| pf = 0; \ |
| af = 0; \ |
| zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| of = 0; \ |
| return cf | pf | af | zf | sf | of; \ |
| } \ |
| } |
| |
| /*-------------------------------------------------------------*/ |
| |
| |
| #if PROFILE_RFLAGS |
| |
| static Bool initted = False; |
| |
| /* C flag, fast route */ |
| static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; |
| /* C flag, slow route */ |
| static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; |
| /* table for calculate_cond */ |
| static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; |
| /* total entry counts for calc_all, calc_c, calc_cond. */ |
| static UInt n_calc_all = 0; |
| static UInt n_calc_c = 0; |
| static UInt n_calc_cond = 0; |
| |
| #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) |
| |
| |
| static void showCounts ( void ) |
| { |
| Int op, co; |
| HChar ch; |
| vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", |
| n_calc_all, n_calc_cond, n_calc_c); |
| |
| vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" |
| " S NS P NP L NL LE NLE\n"); |
| vex_printf(" -----------------------------------------------------" |
| "----------------------------------------\n"); |
| for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| |
| ch = ' '; |
| if (op > 0 && (op-1) % 4 == 0) |
| ch = 'B'; |
| if (op > 0 && (op-1) % 4 == 1) |
| ch = 'W'; |
| if (op > 0 && (op-1) % 4 == 2) |
| ch = 'L'; |
| if (op > 0 && (op-1) % 4 == 3) |
| ch = 'Q'; |
| |
| vex_printf("%2d%c: ", op, ch); |
| vex_printf("%6u ", tabc_slow[op]); |
| vex_printf("%6u ", tabc_fast[op]); |
| for (co = 0; co < 16; co++) { |
| Int n = tab_cond[op][co]; |
| if (n >= 1000) { |
| vex_printf(" %3dK", n / 1000); |
| } else |
| if (n >= 0) { |
| vex_printf(" %3d ", n ); |
| } else { |
| vex_printf(" "); |
| } |
| } |
| vex_printf("\n"); |
| } |
| vex_printf("\n"); |
| } |
| |
| static void initCounts ( void ) |
| { |
| Int op, co; |
| initted = True; |
| for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| tabc_fast[op] = tabc_slow[op] = 0; |
| for (co = 0; co < 16; co++) |
| tab_cond[op][co] = 0; |
| } |
| } |
| |
| #endif /* PROFILE_RFLAGS */ |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate all the 6 flags from the supplied thunk parameters. |
| Worker function, not directly called from generated code. */ |
| static |
| ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, |
| ULong cc_dep1_formal, |
| ULong cc_dep2_formal, |
| ULong cc_ndep_formal ) |
| { |
| switch (cc_op) { |
| case AMD64G_CC_OP_COPY: |
| return cc_dep1_formal |
| & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
| | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); |
| |
| case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); |
| case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); |
| case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); |
| case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); |
| |
| case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); |
| case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); |
| case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); |
| case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); |
| |
| case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); |
| case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); |
| case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); |
| case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); |
| |
| case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); |
| case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); |
| case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); |
| case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); |
| |
| case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); |
| case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); |
| case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); |
| case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); |
| |
| case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); |
| case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); |
| case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); |
| case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); |
| |
| case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); |
| case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); |
| case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); |
| case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); |
| |
| case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); |
| case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); |
| case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); |
| case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); |
| |
| case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); |
| case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); |
| case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); |
| case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); |
| |
| case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); |
| case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); |
| case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); |
| case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); |
| |
| case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); |
| case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); |
| case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); |
| case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); |
| |
| case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, |
| UShort, toUShort ); |
| case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, |
| UInt, toUInt ); |
| case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, |
| ULong, idULong ); |
| |
| case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; |
| |
| case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, |
| Short, toUShort ); |
| case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, |
| Int, toUInt ); |
| case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, |
| Long, idULong ); |
| |
| case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; |
| |
| case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt ); |
| case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong ); |
| |
| case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt ); |
| case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong ); |
| |
| case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt ); |
| case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong ); |
| |
| case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt ); |
| case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong ); |
| |
| default: |
| /* shouldn't really make these calls from generated code */ |
| vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" |
| "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
| cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); |
| vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); |
| } |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate all the 6 flags from the supplied thunk parameters. */ |
| ULong amd64g_calculate_rflags_all ( ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| n_calc_all++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| return |
| amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* Calculate just the carry flag from the supplied thunk parameters. */ |
| ULong amd64g_calculate_rflags_c ( ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| n_calc_c++; |
| tabc_fast[cc_op]++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| |
| /* Fast-case some common ones. */ |
| switch (cc_op) { |
| case AMD64G_CC_OP_COPY: |
| return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; |
| case AMD64G_CC_OP_LOGICQ: |
| case AMD64G_CC_OP_LOGICL: |
| case AMD64G_CC_OP_LOGICW: |
| case AMD64G_CC_OP_LOGICB: |
| return 0; |
| // case AMD64G_CC_OP_SUBL: |
| // return ((UInt)cc_dep1) < ((UInt)cc_dep2) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_SUBW: |
| // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_SUBB: |
| // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) |
| // ? AMD64G_CC_MASK_C : 0; |
| // case AMD64G_CC_OP_INCL: |
| // case AMD64G_CC_OP_DECL: |
| // return cc_ndep & AMD64G_CC_MASK_C; |
| default: |
| break; |
| } |
| |
| # if PROFILE_RFLAGS |
| tabc_fast[cc_op]--; |
| tabc_slow[cc_op]++; |
| # endif |
| |
| return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) |
| & AMD64G_CC_MASK_C; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| /* returns 1 or 0 */ |
| ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, |
| ULong cc_op, |
| ULong cc_dep1, |
| ULong cc_dep2, |
| ULong cc_ndep ) |
| { |
| ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, |
| cc_dep2, cc_ndep); |
| ULong of,sf,zf,cf,pf; |
| ULong inv = cond & 1; |
| |
| # if PROFILE_RFLAGS |
| if (!initted) initCounts(); |
| tab_cond[cc_op][cond]++; |
| n_calc_cond++; |
| if (SHOW_COUNTS_NOW) showCounts(); |
| # endif |
| |
| switch (cond) { |
| case AMD64CondNO: |
| case AMD64CondO: /* OF == 1 */ |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| return 1 & (inv ^ of); |
| |
| case AMD64CondNZ: |
| case AMD64CondZ: /* ZF == 1 */ |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ zf); |
| |
| case AMD64CondNB: |
| case AMD64CondB: /* CF == 1 */ |
| cf = rflags >> AMD64G_CC_SHIFT_C; |
| return 1 & (inv ^ cf); |
| break; |
| |
| case AMD64CondNBE: |
| case AMD64CondBE: /* (CF or ZF) == 1 */ |
| cf = rflags >> AMD64G_CC_SHIFT_C; |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ (cf | zf)); |
| break; |
| |
| case AMD64CondNS: |
| case AMD64CondS: /* SF == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| return 1 & (inv ^ sf); |
| |
| case AMD64CondNP: |
| case AMD64CondP: /* PF == 1 */ |
| pf = rflags >> AMD64G_CC_SHIFT_P; |
| return 1 & (inv ^ pf); |
| |
| case AMD64CondNL: |
| case AMD64CondL: /* (SF xor OF) == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| return 1 & (inv ^ (sf ^ of)); |
| break; |
| |
| case AMD64CondNLE: |
| case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ |
| sf = rflags >> AMD64G_CC_SHIFT_S; |
| of = rflags >> AMD64G_CC_SHIFT_O; |
| zf = rflags >> AMD64G_CC_SHIFT_Z; |
| return 1 & (inv ^ ((sf ^ of) | zf)); |
| break; |
| |
| default: |
| /* shouldn't really make these calls from generated code */ |
| vex_printf("amd64g_calculate_condition" |
| "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
| cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| vpanic("amd64g_calculate_condition"); |
| } |
| } |
| |
| |
| /* VISIBLE TO LIBVEX CLIENT */ |
| ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state ) |
| { |
| ULong rflags = amd64g_calculate_rflags_all_WRK( |
| vex_state->guest_CC_OP, |
| vex_state->guest_CC_DEP1, |
| vex_state->guest_CC_DEP2, |
| vex_state->guest_CC_NDEP |
| ); |
| Long dflag = vex_state->guest_DFLAG; |
| vassert(dflag == 1 || dflag == -1); |
| if (dflag == -1) |
| rflags |= (1<<10); |
| if (vex_state->guest_IDFLAG == 1) |
| rflags |= (1<<21); |
| if (vex_state->guest_ACFLAG == 1) |
| rflags |= (1<<18); |
| |
| return rflags; |
| } |
| |
| /* VISIBLE TO LIBVEX CLIENT */ |
| void |
| LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, |
| /*MOD*/VexGuestAMD64State* vex_state ) |
| { |
| ULong oszacp = amd64g_calculate_rflags_all_WRK( |
| vex_state->guest_CC_OP, |
| vex_state->guest_CC_DEP1, |
| vex_state->guest_CC_DEP2, |
| vex_state->guest_CC_NDEP |
| ); |
| if (new_carry_flag & 1) { |
| oszacp |= AMD64G_CC_MASK_C; |
| } else { |
| oszacp &= ~AMD64G_CC_MASK_C; |
| } |
| vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| vex_state->guest_CC_DEP1 = oszacp; |
| vex_state->guest_CC_DEP2 = 0; |
| vex_state->guest_CC_NDEP = 0; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- %rflags translation-time function specialisers. ---*/ |
| /*--- These help iropt specialise calls the above run-time ---*/ |
| /*--- %rflags functions. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Used by the optimiser to try specialisations. Returns an |
| equivalent expression, or NULL if none. */ |
| |
| static Bool isU64 ( IRExpr* e, ULong n ) |
| { |
| return toBool( e->tag == Iex_Const |
| && e->Iex.Const.con->tag == Ico_U64 |
| && e->Iex.Const.con->Ico.U64 == n ); |
| } |
| |
| IRExpr* guest_amd64_spechelper ( const HChar* function_name, |
| IRExpr** args, |
| IRStmt** precedingStmts, |
| Int n_precedingStmts ) |
| { |
| # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) |
| # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) |
| # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) |
| # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) |
| # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) |
| |
| Int i, arity = 0; |
| for (i = 0; args[i]; i++) |
| arity++; |
| # if 0 |
| vex_printf("spec request:\n"); |
| vex_printf(" %s ", function_name); |
| for (i = 0; i < arity; i++) { |
| vex_printf(" "); |
| ppIRExpr(args[i]); |
| } |
| vex_printf("\n"); |
| # endif |
| |
| /* --------- specialising "amd64g_calculate_condition" --------- */ |
| |
| if (vex_streq(function_name, "amd64g_calculate_condition")) { |
| /* specialise calls to above "calculate condition" function */ |
| IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; |
| vassert(arity == 5); |
| cond = args[0]; |
| cc_op = args[1]; |
| cc_dep1 = args[2]; |
| cc_dep2 = args[3]; |
| |
| /*---------------- ADDQ ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { |
| /* long long add, then Z --> test (dst+src == 0) */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, |
| binop(Iop_Add64, cc_dep1, cc_dep2), |
| mkU64(0))); |
| } |
| |
| /*---------------- SUBQ ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { |
| /* long long sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { |
| /* long long sub/cmp, then NZ --> test dst!=src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE64,cc_dep1,cc_dep2)); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { |
| /* long long sub/cmp, then L (signed less than) |
| --> test dst <s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { |
| /* long long sub/cmp, then B (unsigned less than) |
| --> test dst <u src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { |
| /* long long sub/cmp, then NB (unsigned greater than or equal) |
| --> test src <=u dst */ |
| /* Note, args are opposite way round from the usual */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) { |
| /* long sub/cmp, then NLE (signed greater than) |
| --> test !(dst <=s src) |
| --> test (dst >s src) |
| --> test (src <s dst) */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64S, cc_dep2, cc_dep1)); |
| |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { |
| /* long long sub/cmp, then BE (unsigned less than or equal) |
| --> test dst <=u src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) { |
| /* long long sub/cmp, then NBE (unsigned greater than) |
| --> test !(dst <=u src) */ |
| return binop(Iop_Xor64, |
| unop(Iop_1Uto64, |
| binop(Iop_CmpLE64U, cc_dep1, cc_dep2)), |
| mkU64(1)); |
| } |
| |
| /*---------------- SUBL ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { |
| /* long sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ32, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { |
| /* long sub/cmp, then NZ --> test dst!=src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE32, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { |
| /* long sub/cmp, then L (signed less than) |
| --> test dst <s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32S, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { |
| /* long sub/cmp, then LE (signed less than or equal) |
| --> test dst <=s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE32S, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { |
| /* long sub/cmp, then NLE (signed greater than) |
| --> test !(dst <=s src) |
| --> test (dst >s src) |
| --> test (src <s dst) */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32S, |
| unop(Iop_64to32, cc_dep2), |
| unop(Iop_64to32, cc_dep1))); |
| |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { |
| /* long sub/cmp, then BE (unsigned less than or equal) |
| --> test dst <=u src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE32U, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { |
| /* long sub/cmp, then NBE (unsigned greater than) |
| --> test src <u dst */ |
| /* Note, args are opposite way round from the usual */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32U, |
| unop(Iop_64to32, cc_dep2), |
| unop(Iop_64to32, cc_dep1))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { |
| /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32S, |
| binop(Iop_Sub32, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2)), |
| mkU32(0))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { |
| /* long sub/cmp, then B (unsigned less than) |
| --> test dst <u src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32U, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| |
| /*---------------- SUBW ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { |
| /* word sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ16, |
| unop(Iop_64to16,cc_dep1), |
| unop(Iop_64to16,cc_dep2))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { |
| /* word sub/cmp, then NZ --> test dst!=src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE16, |
| unop(Iop_64to16,cc_dep1), |
| unop(Iop_64to16,cc_dep2))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { |
| /* word sub/cmp, then LE (signed less than or equal) |
| --> test dst <=s src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64S, |
| binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| binop(Iop_Shl64,cc_dep2,mkU8(48)))); |
| |
| } |
| |
| /*---------------- SUBB ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { |
| /* byte sub/cmp, then Z --> test dst==src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ8, |
| unop(Iop_64to8,cc_dep1), |
| unop(Iop_64to8,cc_dep2))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { |
| /* byte sub/cmp, then NZ --> test dst!=src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE8, |
| unop(Iop_64to8,cc_dep1), |
| unop(Iop_64to8,cc_dep2))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { |
| /* byte sub/cmp, then BE (unsigned less than or equal) |
| --> test dst <=u src */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE64U, |
| binop(Iop_And64, cc_dep1, mkU64(0xFF)), |
| binop(Iop_And64, cc_dep2, mkU64(0xFF)))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) |
| && isU64(cc_dep2, 0)) { |
| /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) |
| --> test dst <s 0 |
| --> (ULong)dst[7] |
| This is yet another scheme by which gcc figures out if the |
| top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ |
| /* Note: isU64(cc_dep2, 0) is correct, even though this is |
| for an 8-bit comparison, since the args to the helper |
| function are always U64s. */ |
| return binop(Iop_And64, |
| binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| mkU64(1)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) |
| && isU64(cc_dep2, 0)) { |
| /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) |
| --> test !(dst <s 0) |
| --> (ULong) !dst[7] |
| */ |
| return binop(Iop_Xor64, |
| binop(Iop_And64, |
| binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| mkU64(1)), |
| mkU64(1)); |
| } |
| |
| /*---------------- LOGICQ ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { |
| /* long long and/or/xor, then Z --> test dst==0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { |
| /* long long and/or/xor, then NZ --> test dst!=0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE64, cc_dep1, mkU64(0))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { |
| /* long long and/or/xor, then L |
| LOGIC sets SF and ZF according to the |
| result and makes OF be zero. L computes SF ^ OF, but |
| OF is zero, so this reduces to SF -- which will be 1 iff |
| the result is < signed 0. Hence ... |
| */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64S, |
| cc_dep1, |
| mkU64(0))); |
| } |
| |
| /*---------------- LOGICL ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { |
| /* long and/or/xor, then Z --> test dst==0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ32, |
| unop(Iop_64to32, cc_dep1), |
| mkU32(0))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { |
| /* long and/or/xor, then NZ --> test dst!=0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE32, |
| unop(Iop_64to32, cc_dep1), |
| mkU32(0))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { |
| /* long and/or/xor, then LE |
| This is pretty subtle. LOGIC sets SF and ZF according to the |
| result and makes OF be zero. LE computes (SF ^ OF) | ZF, but |
| OF is zero, so this reduces to SF | ZF -- which will be 1 iff |
| the result is <=signed 0. Hence ... |
| */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLE32S, |
| unop(Iop_64to32, cc_dep1), |
| mkU32(0))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { |
| /* long and/or/xor, then S --> (ULong)result[31] */ |
| return binop(Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| mkU64(1)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { |
| /* long and/or/xor, then S --> (ULong) ~ result[31] */ |
| return binop(Iop_Xor64, |
| binop(Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| mkU64(1)), |
| mkU64(1)); |
| } |
| |
| /*---------------- LOGICW ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) { |
| /* word and/or/xor, then Z --> test dst==0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, |
| binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| mkU64(0))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) { |
| /* word and/or/xor, then NZ --> test dst!=0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE64, |
| binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| mkU64(0))); |
| } |
| |
| /*---------------- LOGICB ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { |
| /* byte and/or/xor, then Z --> test dst==0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| mkU64(0))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { |
| /* byte and/or/xor, then NZ --> test dst!=0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| mkU64(0))); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { |
| /* this is an idiom gcc sometimes uses to find out if the top |
| bit of a byte register is set: eg testb %al,%al; js .. |
| Since it just depends on the top bit of the byte, extract |
| that bit and explicitly get rid of all the rest. This |
| helps memcheck avoid false positives in the case where any |
| of the other bits in the byte are undefined. */ |
| /* byte and/or/xor, then S --> (UInt)result[7] */ |
| return binop(Iop_And64, |
| binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| mkU64(1)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { |
| /* byte and/or/xor, then NS --> (UInt)!result[7] */ |
| return binop(Iop_Xor64, |
| binop(Iop_And64, |
| binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| mkU64(1)), |
| mkU64(1)); |
| } |
| |
| /*---------------- INCB ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { |
| /* 8-bit inc, then LE --> sign bit of the arg */ |
| return binop(Iop_And64, |
| binop(Iop_Shr64, |
| binop(Iop_Sub64, cc_dep1, mkU64(1)), |
| mkU8(7)), |
| mkU64(1)); |
| } |
| |
| /*---------------- INCW ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { |
| /* 16-bit inc, then Z --> test dst == 0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ64, |
| binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| mkU64(0))); |
| } |
| |
| /*---------------- DECL ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { |
| /* dec L, then Z --> test dst == 0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpEQ32, |
| unop(Iop_64to32, cc_dep1), |
| mkU32(0))); |
| } |
| |
| /*---------------- DECW ----------------*/ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { |
| /* 16-bit dec, then NZ --> test dst != 0 */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpNE64, |
| binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| mkU64(0))); |
| } |
| |
| /*---------------- COPY ----------------*/ |
| /* This can happen, as a result of amd64 FP compares: "comisd ... ; |
| jbe" for example. */ |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) && |
| (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { |
| /* COPY, then BE --> extract C and Z from dep1, and test (C |
| or Z == 1). */ |
| /* COPY, then NBE --> extract C and Z from dep1, and test (C |
| or Z == 0). */ |
| ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpEQ64, |
| binop( |
| Iop_And64, |
| binop( |
| Iop_Or64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) |
| ), |
| mkU64(1) |
| ), |
| mkU64(nnn) |
| ) |
| ); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { |
| /* COPY, then B --> extract C dep1, and test (C == 1). */ |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpNE64, |
| binop( |
| Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| mkU64(1) |
| ), |
| mkU64(0) |
| ) |
| ); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) |
| && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { |
| /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ |
| /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ |
| UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpEQ64, |
| binop( |
| Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), |
| mkU64(1) |
| ), |
| mkU64(nnn) |
| ) |
| ); |
| } |
| |
| if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { |
| /* COPY, then P --> extract P from dep1, and test (P == 1). */ |
| return |
| unop( |
| Iop_1Uto64, |
| binop( |
| Iop_CmpNE64, |
| binop( |
| Iop_And64, |
| binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), |
| mkU64(1) |
| ), |
| mkU64(0) |
| ) |
| ); |
| } |
| |
| return NULL; |
| } |
| |
| /* --------- specialising "amd64g_calculate_rflags_c" --------- */ |
| |
| if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { |
| /* specialise calls to above "calculate_rflags_c" function */ |
| IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; |
| vassert(arity == 4); |
| cc_op = args[0]; |
| cc_dep1 = args[1]; |
| cc_dep2 = args[2]; |
| cc_ndep = args[3]; |
| |
| if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { |
| /* C after sub denotes unsigned less than */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64U, |
| cc_dep1, |
| cc_dep2)); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { |
| /* C after sub denotes unsigned less than */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT32U, |
| unop(Iop_64to32, cc_dep1), |
| unop(Iop_64to32, cc_dep2))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { |
| /* C after sub denotes unsigned less than */ |
| return unop(Iop_1Uto64, |
| binop(Iop_CmpLT64U, |
| binop(Iop_And64,cc_dep1,mkU64(0xFF)), |
| binop(Iop_And64,cc_dep2,mkU64(0xFF)))); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICL) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICW) |
| || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { |
| /* cflag after logic is zero */ |
| return mkU64(0); |
| } |
| if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) |
| || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { |
| /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ |
| return cc_ndep; |
| } |
| |
| # if 0 |
| if (cc_op->tag == Iex_Const) { |
| vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); |
| } |
| # endif |
| |
| return NULL; |
| } |
| |
| # undef unop |
| # undef binop |
| # undef mkU64 |
| # undef mkU32 |
| # undef mkU8 |
| |
| return NULL; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Supporting functions for x87 FPU activities. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| static inline Bool host_is_little_endian ( void ) |
| { |
| UInt x = 0x76543210; |
| UChar* p = (UChar*)(&x); |
| return toBool(*p == 0x10); |
| } |
| |
| /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) |
| { |
| Bool mantissaIsZero; |
| Int bexp; |
| UChar sign; |
| UChar* f64; |
| |
| vassert(host_is_little_endian()); |
| |
| /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ |
| |
| f64 = (UChar*)(&dbl); |
| sign = toUChar( (f64[7] >> 7) & 1 ); |
| |
| /* First off, if the tag indicates the register was empty, |
| return 1,0,sign,1 */ |
| if (tag == 0) { |
| /* vex_printf("Empty\n"); */ |
| return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) |
| | AMD64G_FC_MASK_C0; |
| } |
| |
| bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); |
| bexp &= 0x7FF; |
| |
| mantissaIsZero |
| = toBool( |
| (f64[6] & 0x0F) == 0 |
| && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 |
| ); |
| |
| /* If both exponent and mantissa are zero, the value is zero. |
| Return 1,0,sign,0. */ |
| if (bexp == 0 && mantissaIsZero) { |
| /* vex_printf("Zero\n"); */ |
| return AMD64G_FC_MASK_C3 | 0 |
| | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| } |
| |
| /* If exponent is zero but mantissa isn't, it's a denormal. |
| Return 1,1,sign,0. */ |
| if (bexp == 0 && !mantissaIsZero) { |
| /* vex_printf("Denormal\n"); */ |
| return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 |
| | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| } |
| |
| /* If the exponent is 7FF and the mantissa is zero, this is an infinity. |
| Return 0,1,sign,1. */ |
| if (bexp == 0x7FF && mantissaIsZero) { |
| /* vex_printf("Inf\n"); */ |
| return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) |
| | AMD64G_FC_MASK_C0; |
| } |
| |
| /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. |
| Return 0,0,sign,1. */ |
| if (bexp == 0x7FF && !mantissaIsZero) { |
| /* vex_printf("NaN\n"); */ |
| return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; |
| } |
| |
| /* Uh, ok, we give up. It must be a normal finite number. |
| Return 0,1,sign,0. |
| */ |
| /* vex_printf("normal\n"); */ |
| return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| } |
| |
| |
| /* This is used to implement both 'frstor' and 'fldenv'. The latter |
| appears to differ from the former only in that the 8 FP registers |
| themselves are not transferred into the guest state. */ |
| static |
| VexEmNote do_put_x87 ( Bool moveRegs, |
| /*IN*/UChar* x87_state, |
| /*OUT*/VexGuestAMD64State* vex_state ) |
| { |
| Int stno, preg; |
| UInt tag; |
| ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State* x87 = (Fpu_State*)x87_state; |
| UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; |
| UInt tagw = x87->env[FP_ENV_TAG]; |
| UInt fpucw = x87->env[FP_ENV_CTRL]; |
| UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; |
| VexEmNote ew; |
| UInt fpround; |
| ULong pair; |
| |
| /* Copy registers and tags */ |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| tag = (tagw >> (2*preg)) & 3; |
| if (tag == 3) { |
| /* register is empty */ |
| /* hmm, if it's empty, does it still get written? Probably |
| safer to say it does. If we don't, memcheck could get out |
| of sync, in that it thinks all FP registers are defined by |
| this helper, but in reality some have not been updated. */ |
| if (moveRegs) |
| vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| vexTags[preg] = 0; |
| } else { |
| /* register is non-empty */ |
| if (moveRegs) |
| convert_f80le_to_f64le( &x87->reg[10*stno], |
| (UChar*)&vexRegs[preg] ); |
| vexTags[preg] = 1; |
| } |
| } |
| |
| /* stack pointer */ |
| vex_state->guest_FTOP = ftop; |
| |
| /* status word */ |
| vex_state->guest_FC3210 = c3210; |
| |
| /* handle the control word, setting FPROUND and detecting any |
| emulation warnings. */ |
| pair = amd64g_check_fldcw ( (ULong)fpucw ); |
| fpround = (UInt)pair & 0xFFFFFFFFULL; |
| ew = (VexEmNote)(pair >> 32); |
| |
| vex_state->guest_FPROUND = fpround & 3; |
| |
| /* emulation warnings --> caller */ |
| return ew; |
| } |
| |
| |
| /* Create an x87 FPU state from the guest state, as close as |
| we can approximate it. */ |
| static |
| void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, |
| /*OUT*/UChar* x87_state ) |
| { |
| Int i, stno, preg; |
| UInt tagw; |
| ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State* x87 = (Fpu_State*)x87_state; |
| UInt ftop = vex_state->guest_FTOP; |
| UInt c3210 = vex_state->guest_FC3210; |
| |
| for (i = 0; i < 14; i++) |
| x87->env[i] = 0; |
| |
| x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| x87->env[FP_ENV_STAT] |
| = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
| x87->env[FP_ENV_CTRL] |
| = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| |
| /* Dump the register stack in ST order. */ |
| tagw = 0; |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| if (vexTags[preg] == 0) { |
| /* register is empty */ |
| tagw |= (3 << (2*preg)); |
| convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| &x87->reg[10*stno] ); |
| } else { |
| /* register is full. */ |
| tagw |= (0 << (2*preg)); |
| convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| &x87->reg[10*stno] ); |
| } |
| } |
| x87->env[FP_ENV_TAG] = toUShort(tagw); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (reads guest state, writes guest mem) */ |
| /* NOTE: only handles 32-bit format (no REX.W on the insn) */ |
| void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) |
| { |
| /* Derived from values obtained from |
| vendor_id : AuthenticAMD |
| cpu family : 15 |
| model : 12 |
| model name : AMD Athlon(tm) 64 Processor 3200+ |
| stepping : 0 |
| cpu MHz : 2200.000 |
| cache size : 512 KB |
| */ |
| /* Somewhat roundabout, but at least it's simple. */ |
| Fpu_State tmp; |
| UShort* addrS = (UShort*)addr; |
| UChar* addrC = (UChar*)addr; |
| U128* xmm = (U128*)(addr + 160); |
| UInt mxcsr; |
| UShort fp_tags; |
| UInt summary_tags; |
| Int r, stno; |
| UShort *srcS, *dstS; |
| |
| do_get_x87( gst, (UChar*)&tmp ); |
| mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); |
| |
| /* Now build the proper fxsave image from the x87 image we just |
| made. */ |
| |
| addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ |
| addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ |
| |
| /* set addrS[2] in an endian-independent way */ |
| summary_tags = 0; |
| fp_tags = tmp.env[FP_ENV_TAG]; |
| for (r = 0; r < 8; r++) { |
| if ( ((fp_tags >> (2*r)) & 3) != 3 ) |
| summary_tags |= (1 << r); |
| } |
| addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ |
| addrC[5] = 0; /* pad */ |
| |
| /* FOP: faulting fpu opcode. From experimentation, the real CPU |
| does not write this field. (?!) */ |
| addrS[3] = 0; /* BOGUS */ |
| |
| /* RIP (Last x87 instruction pointer). From experimentation, the |
| real CPU does not write this field. (?!) */ |
| addrS[4] = 0; /* BOGUS */ |
| addrS[5] = 0; /* BOGUS */ |
| addrS[6] = 0; /* BOGUS */ |
| addrS[7] = 0; /* BOGUS */ |
| |
| /* RDP (Last x87 data pointer). From experimentation, the real CPU |
| does not write this field. (?!) */ |
| addrS[8] = 0; /* BOGUS */ |
| addrS[9] = 0; /* BOGUS */ |
| addrS[10] = 0; /* BOGUS */ |
| addrS[11] = 0; /* BOGUS */ |
| |
| addrS[12] = toUShort(mxcsr); /* MXCSR */ |
| addrS[13] = toUShort(mxcsr >> 16); |
| |
| addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ |
| addrS[15] = 0x0000; /* MXCSR mask (hi16) */ |
| |
| /* Copy in the FP registers, in ST order. */ |
| for (stno = 0; stno < 8; stno++) { |
| srcS = (UShort*)(&tmp.reg[10*stno]); |
| dstS = (UShort*)(&addrS[16 + 8*stno]); |
| dstS[0] = srcS[0]; |
| dstS[1] = srcS[1]; |
| dstS[2] = srcS[2]; |
| dstS[3] = srcS[3]; |
| dstS[4] = srcS[4]; |
| dstS[5] = 0; |
| dstS[6] = 0; |
| dstS[7] = 0; |
| } |
| |
| /* That's the first 160 bytes of the image done. Now only %xmm0 |
| .. %xmm15 remain to be copied. If the host is big-endian, these |
| need to be byte-swapped. */ |
| vassert(host_is_little_endian()); |
| |
| # define COPY_U128(_dst,_src) \ |
| do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ |
| _dst[2] = _src[2]; _dst[3] = _src[3]; } \ |
| while (0) |
| |
| COPY_U128( xmm[0], gst->guest_YMM0 ); |
| COPY_U128( xmm[1], gst->guest_YMM1 ); |
| COPY_U128( xmm[2], gst->guest_YMM2 ); |
| COPY_U128( xmm[3], gst->guest_YMM3 ); |
| COPY_U128( xmm[4], gst->guest_YMM4 ); |
| COPY_U128( xmm[5], gst->guest_YMM5 ); |
| COPY_U128( xmm[6], gst->guest_YMM6 ); |
| COPY_U128( xmm[7], gst->guest_YMM7 ); |
| COPY_U128( xmm[8], gst->guest_YMM8 ); |
| COPY_U128( xmm[9], gst->guest_YMM9 ); |
| COPY_U128( xmm[10], gst->guest_YMM10 ); |
| COPY_U128( xmm[11], gst->guest_YMM11 ); |
| COPY_U128( xmm[12], gst->guest_YMM12 ); |
| COPY_U128( xmm[13], gst->guest_YMM13 ); |
| COPY_U128( xmm[14], gst->guest_YMM14 ); |
| COPY_U128( xmm[15], gst->guest_YMM15 ); |
| |
| # undef COPY_U128 |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (writes guest state, reads guest mem) */ |
| VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr ) |
| { |
| Fpu_State tmp; |
| VexEmNote warnX87 = EmNote_NONE; |
| VexEmNote warnXMM = EmNote_NONE; |
| UShort* addrS = (UShort*)addr; |
| UChar* addrC = (UChar*)addr; |
| U128* xmm = (U128*)(addr + 160); |
| UShort fp_tags; |
| Int r, stno, i; |
| |
| /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need |
| to be byte-swapped. */ |
| vassert(host_is_little_endian()); |
| |
| # define COPY_U128(_dst,_src) \ |
| do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ |
| _dst[2] = _src[2]; _dst[3] = _src[3]; } \ |
| while (0) |
| |
| COPY_U128( gst->guest_YMM0, xmm[0] ); |
| COPY_U128( gst->guest_YMM1, xmm[1] ); |
| COPY_U128( gst->guest_YMM2, xmm[2] ); |
| COPY_U128( gst->guest_YMM3, xmm[3] ); |
| COPY_U128( gst->guest_YMM4, xmm[4] ); |
| COPY_U128( gst->guest_YMM5, xmm[5] ); |
| COPY_U128( gst->guest_YMM6, xmm[6] ); |
| COPY_U128( gst->guest_YMM7, xmm[7] ); |
| COPY_U128( gst->guest_YMM8, xmm[8] ); |
| COPY_U128( gst->guest_YMM9, xmm[9] ); |
| COPY_U128( gst->guest_YMM10, xmm[10] ); |
| COPY_U128( gst->guest_YMM11, xmm[11] ); |
| COPY_U128( gst->guest_YMM12, xmm[12] ); |
| COPY_U128( gst->guest_YMM13, xmm[13] ); |
| COPY_U128( gst->guest_YMM14, xmm[14] ); |
| COPY_U128( gst->guest_YMM15, xmm[15] ); |
| |
| # undef COPY_U128 |
| |
| /* Copy the x87 registers out of the image, into a temporary |
| Fpu_State struct. */ |
| for (i = 0; i < 14; i++) tmp.env[i] = 0; |
| for (i = 0; i < 80; i++) tmp.reg[i] = 0; |
| /* fill in tmp.reg[0..7] */ |
| for (stno = 0; stno < 8; stno++) { |
| UShort* dstS = (UShort*)(&tmp.reg[10*stno]); |
| UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); |
| dstS[0] = srcS[0]; |
| dstS[1] = srcS[1]; |
| dstS[2] = srcS[2]; |
| dstS[3] = srcS[3]; |
| dstS[4] = srcS[4]; |
| } |
| /* fill in tmp.env[0..13] */ |
| tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ |
| tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ |
| |
| fp_tags = 0; |
| for (r = 0; r < 8; r++) { |
| if (addrC[4] & (1<<r)) |
| fp_tags |= (0 << (2*r)); /* EMPTY */ |
| else |
| fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ |
| } |
| tmp.env[FP_ENV_TAG] = fp_tags; |
| |
| /* Now write 'tmp' into the guest state. */ |
| warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); |
| |
| { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) |
| | ((((UInt)addrS[13]) & 0xFFFF) << 16); |
| ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); |
| |
| warnXMM = (VexEmNote)(w64 >> 32); |
| |
| gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; |
| } |
| |
| /* Prefer an X87 emwarn over an XMM one, if both exist. */ |
| if (warnX87 != EmNote_NONE) |
| return warnX87; |
| else |
| return warnXMM; |
| } |
| |
| |
| /* DIRTY HELPER (writes guest state) */ |
| /* Initialise the x87 FPU state as per 'finit'. */ |
| void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) |
| { |
| Int i; |
| gst->guest_FTOP = 0; |
| for (i = 0; i < 8; i++) { |
| gst->guest_FPTAG[i] = 0; /* empty */ |
| gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ |
| } |
| gst->guest_FPROUND = (ULong)Irrm_NEAREST; |
| gst->guest_FC3210 = 0; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (reads guest memory) */ |
| ULong amd64g_dirtyhelper_loadF80le ( ULong addrU ) |
| { |
| ULong f64; |
| convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); |
| return f64; |
| } |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (writes guest memory) */ |
| void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 ) |
| { |
| convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* CLEAN HELPER */ |
| /* mxcsr[15:0] contains a SSE native format MXCSR value. |
| Extract from it the required SSEROUND value and any resulting |
| emulation warning, and return (warn << 32) | sseround value. |
| */ |
| ULong amd64g_check_ldmxcsr ( ULong mxcsr ) |
| { |
| /* Decide on a rounding mode. mxcsr[14:13] holds it. */ |
| /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| ULong rmode = (mxcsr >> 13) & 3; |
| |
| /* Detect any required emulation warnings. */ |
| VexEmNote ew = EmNote_NONE; |
| |
| if ((mxcsr & 0x1F80) != 0x1F80) { |
| /* unmasked exceptions! */ |
| ew = EmWarn_X86_sseExns; |
| } |
| else |
| if (mxcsr & (1<<15)) { |
| /* FZ is set */ |
| ew = EmWarn_X86_fz; |
| } |
| else |
| if (mxcsr & (1<<6)) { |
| /* DAZ is set */ |
| ew = EmWarn_X86_daz; |
| } |
| |
| return (((ULong)ew) << 32) | ((ULong)rmode); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* CLEAN HELPER */ |
| /* Given sseround as an IRRoundingMode value, create a suitable SSE |
| native format MXCSR value. */ |
| ULong amd64g_create_mxcsr ( ULong sseround ) |
| { |
| sseround &= 3; |
| return 0x1F80 | (sseround << 13); |
| } |
| |
| |
| /* CLEAN HELPER */ |
| /* fpucw[15:0] contains a x87 native format FPU control word. |
| Extract from it the required FPROUND value and any resulting |
| emulation warning, and return (warn << 32) | fpround value. |
| */ |
| ULong amd64g_check_fldcw ( ULong fpucw ) |
| { |
| /* Decide on a rounding mode. fpucw[11:10] holds it. */ |
| /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| ULong rmode = (fpucw >> 10) & 3; |
| |
| /* Detect any required emulation warnings. */ |
| VexEmNote ew = EmNote_NONE; |
| |
| if ((fpucw & 0x3F) != 0x3F) { |
| /* unmasked exceptions! */ |
| ew = EmWarn_X86_x87exns; |
| } |
| else |
| if (((fpucw >> 8) & 3) != 3) { |
| /* unsupported precision */ |
| ew = EmWarn_X86_x87precision; |
| } |
| |
| return (((ULong)ew) << 32) | ((ULong)rmode); |
| } |
| |
| |
| /* CLEAN HELPER */ |
| /* Given fpround as an IRRoundingMode value, create a suitable x87 |
| native format FPU control word. */ |
| ULong amd64g_create_fpucw ( ULong fpround ) |
| { |
| fpround &= 3; |
| return 0x037F | (fpround << 10); |
| } |
| |
| |
| /* This is used to implement 'fldenv'. |
| Reads 28 bytes at x87_state[0 .. 27]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, |
| /*IN*/HWord x87_state) |
| { |
| return do_put_x87( False, (UChar*)x87_state, vex_state ); |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| /* Create an x87 FPU env from the guest state, as close as we can |
| approximate it. Writes 28 bytes at x87_state[0..27]. */ |
| void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, |
| /*OUT*/HWord x87_state ) |
| { |
| Int i, stno, preg; |
| UInt tagw; |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State* x87 = (Fpu_State*)x87_state; |
| UInt ftop = vex_state->guest_FTOP; |
| ULong c3210 = vex_state->guest_FC3210; |
| |
| for (i = 0; i < 14; i++) |
| x87->env[i] = 0; |
| |
| x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| x87->env[FP_ENV_STAT] |
| = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); |
| x87->env[FP_ENV_CTRL] |
| = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); |
| |
| /* Compute the x87 tag word. */ |
| tagw = 0; |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| if (vexTags[preg] == 0) { |
| /* register is empty */ |
| tagw |= (3 << (2*preg)); |
| } else { |
| /* register is full. */ |
| tagw |= (0 << (2*preg)); |
| } |
| } |
| x87->env[FP_ENV_TAG] = toUShort(tagw); |
| |
| /* We don't dump the x87 registers, tho. */ |
| } |
| |
| |
| /* This is used to implement 'fnsave'. |
| Writes 108 bytes at x87_state[0 .. 107]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state, |
| /*OUT*/HWord x87_state) |
| { |
| do_get_x87( vex_state, (UChar*)x87_state ); |
| } |
| |
| |
| /* This is used to implement 'fnsaves'. |
| Writes 94 bytes at x87_state[0 .. 93]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state, |
| /*OUT*/HWord x87_state) |
| { |
| Int i, stno, preg; |
| UInt tagw; |
| ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| UInt ftop = vex_state->guest_FTOP; |
| UInt c3210 = vex_state->guest_FC3210; |
| |
| for (i = 0; i < 7; i++) |
| x87->env[i] = 0; |
| |
| x87->env[FPS_ENV_STAT] |
| = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
| x87->env[FPS_ENV_CTRL] |
| = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| |
| /* Dump the register stack in ST order. */ |
| tagw = 0; |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| if (vexTags[preg] == 0) { |
| /* register is empty */ |
| tagw |= (3 << (2*preg)); |
| convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| &x87->reg[10*stno] ); |
| } else { |
| /* register is full. */ |
| tagw |= (0 << (2*preg)); |
| convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| &x87->reg[10*stno] ); |
| } |
| } |
| x87->env[FPS_ENV_TAG] = toUShort(tagw); |
| } |
| |
| |
| /* This is used to implement 'frstor'. |
| Reads 108 bytes at x87_state[0 .. 107]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state, |
| /*IN*/HWord x87_state) |
| { |
| return do_put_x87( True, (UChar*)x87_state, vex_state ); |
| } |
| |
| |
| /* This is used to implement 'frstors'. |
| Reads 94 bytes at x87_state[0 .. 93]. */ |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER */ |
| VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state, |
| /*IN*/HWord x87_state) |
| { |
| Int stno, preg; |
| UInt tag; |
| ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7; |
| UInt tagw = x87->env[FPS_ENV_TAG]; |
| UInt fpucw = x87->env[FPS_ENV_CTRL]; |
| UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700; |
| VexEmNote ew; |
| UInt fpround; |
| ULong pair; |
| |
| /* Copy registers and tags */ |
| for (stno = 0; stno < 8; stno++) { |
| preg = (stno + ftop) & 7; |
| tag = (tagw >> (2*preg)) & 3; |
| if (tag == 3) { |
| /* register is empty */ |
| /* hmm, if it's empty, does it still get written? Probably |
| safer to say it does. If we don't, memcheck could get out |
| of sync, in that it thinks all FP registers are defined by |
| this helper, but in reality some have not been updated. */ |
| vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| vexTags[preg] = 0; |
| } else { |
| /* register is non-empty */ |
| convert_f80le_to_f64le( &x87->reg[10*stno], |
| (UChar*)&vexRegs[preg] ); |
| vexTags[preg] = 1; |
| } |
| } |
| |
| /* stack pointer */ |
| vex_state->guest_FTOP = ftop; |
| |
| /* status word */ |
| vex_state->guest_FC3210 = c3210; |
| |
| /* handle the control word, setting FPROUND and detecting any |
| emulation warnings. */ |
| pair = amd64g_check_fldcw ( (ULong)fpucw ); |
| fpround = (UInt)pair & 0xFFFFFFFFULL; |
| ew = (VexEmNote)(pair >> 32); |
| |
| vex_state->guest_FPROUND = fpround & 3; |
| |
| /* emulation warnings --> caller */ |
| return ew; |
| } |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Misc integer helpers, including rotates and CPUID. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Claim to be the following CPU, which is probably representative of |
| the lowliest (earliest) amd64 offerings. It can do neither sse3 |
| nor cx16. |
| |
| vendor_id : AuthenticAMD |
| cpu family : 15 |
| model : 5 |
| model name : AMD Opteron (tm) Processor 848 |
| stepping : 10 |
| cpu MHz : 1797.682 |
| cache size : 1024 KB |
| fpu : yes |
| fpu_exception : yes |
| cpuid level : 1 |
| wp : yes |
| flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| mtrr pge mca cmov pat pse36 clflush mmx fxsr |
| sse sse2 syscall nx mmxext lm 3dnowext 3dnow |
| bogomips : 3600.62 |
| TLB size : 1088 4K pages |
| clflush size : 64 |
| cache_alignment : 64 |
| address sizes : 40 bits physical, 48 bits virtual |
| power management: ts fid vid ttp |
| |
| 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact |
| we don't support them. See #291568. 3dnow is 80000001.EDX.31 |
| and 3dnowext is 80000001.EDX.30. |
| */ |
| void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) |
| { |
| # define SET_ABCD(_a,_b,_c,_d) \ |
| do { st->guest_RAX = (ULong)(_a); \ |
| st->guest_RBX = (ULong)(_b); \ |
| st->guest_RCX = (ULong)(_c); \ |
| st->guest_RDX = (ULong)(_d); \ |
| } while (0) |
| |
| switch (0xFFFFFFFF & st->guest_RAX) { |
| case 0x00000000: |
| SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); |
| break; |
| case 0x00000001: |
| SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); |
| break; |
| case 0x80000000: |
| SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); |
| break; |
| case 0x80000001: |
| /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is |
| the original it-is-supported value that the h/w provides. |
| See #291568. */ |
| SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/ |
| 0x21d3fbff); |
| break; |
| case 0x80000002: |
| SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); |
| break; |
| case 0x80000003: |
| SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); |
| break; |
| case 0x80000004: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000005: |
| SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); |
| break; |
| case 0x80000006: |
| SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); |
| break; |
| case 0x80000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); |
| break; |
| case 0x80000008: |
| SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| default: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| } |
| # undef SET_ABCD |
| } |
| |
| |
| /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 |
| capable. |
| |
| vendor_id : GenuineIntel |
| cpu family : 6 |
| model : 15 |
| model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz |
| stepping : 6 |
| cpu MHz : 2394.000 |
| cache size : 4096 KB |
| physical id : 0 |
| siblings : 2 |
| core id : 0 |
| cpu cores : 2 |
| fpu : yes |
| fpu_exception : yes |
| cpuid level : 10 |
| wp : yes |
| flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| mtrr pge mca cmov pat pse36 clflush dts acpi |
| mmx fxsr sse sse2 ss ht tm syscall nx lm |
| constant_tsc pni monitor ds_cpl vmx est tm2 |
| cx16 xtpr lahf_lm |
| bogomips : 4798.78 |
| clflush size : 64 |
| cache_alignment : 64 |
| address sizes : 36 bits physical, 48 bits virtual |
| power management: |
| */ |
| void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) |
| { |
| # define SET_ABCD(_a,_b,_c,_d) \ |
| do { st->guest_RAX = (ULong)(_a); \ |
| st->guest_RBX = (ULong)(_b); \ |
| st->guest_RCX = (ULong)(_c); \ |
| st->guest_RDX = (ULong)(_d); \ |
| } while (0) |
| |
| switch (0xFFFFFFFF & st->guest_RAX) { |
| case 0x00000000: |
| SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); |
| break; |
| case 0x00000001: |
| SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); |
| break; |
| case 0x00000002: |
| SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); |
| break; |
| case 0x00000003: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000004: { |
| switch (0xFFFFFFFF & st->guest_RCX) { |
| case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, |
| 0x0000003f, 0x00000001); break; |
| case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, |
| 0x0000003f, 0x00000001); break; |
| case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, |
| 0x00000fff, 0x00000001); break; |
| default: SET_ABCD(0x00000000, 0x00000000, |
| 0x00000000, 0x00000000); break; |
| } |
| break; |
| } |
| case 0x00000005: |
| SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); |
| break; |
| case 0x00000006: |
| SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); |
| break; |
| case 0x00000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000008: |
| SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000009: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x0000000a: |
| unhandled_eax_value: |
| SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000000: |
| SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000001: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); |
| break; |
| case 0x80000002: |
| SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| break; |
| case 0x80000003: |
| SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); |
| break; |
| case 0x80000004: |
| SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); |
| break; |
| case 0x80000005: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000006: |
| SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); |
| break; |
| case 0x80000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000008: |
| SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| default: |
| goto unhandled_eax_value; |
| } |
| # undef SET_ABCD |
| } |
| |
| |
| /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 |
| capable. |
| |
| vendor_id : GenuineIntel |
| cpu family : 6 |
| model : 37 |
| model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz |
| stepping : 2 |
| cpu MHz : 3334.000 |
| cache size : 4096 KB |
| physical id : 0 |
| siblings : 4 |
| core id : 0 |
| cpu cores : 2 |
| apicid : 0 |
| initial apicid : 0 |
| fpu : yes |
| fpu_exception : yes |
| cpuid level : 11 |
| wp : yes |
| flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| mtrr pge mca cmov pat pse36 clflush dts acpi |
| mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| lm constant_tsc arch_perfmon pebs bts rep_good |
| xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 |
| xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida |
| arat tpr_shadow vnmi flexpriority ept vpid |
| bogomips : 6957.57 |
| clflush size : 64 |
| cache_alignment : 64 |
| address sizes : 36 bits physical, 48 bits virtual |
| power management: |
| */ |
| void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) |
| { |
| # define SET_ABCD(_a,_b,_c,_d) \ |
| do { st->guest_RAX = (ULong)(_a); \ |
| st->guest_RBX = (ULong)(_b); \ |
| st->guest_RCX = (ULong)(_c); \ |
| st->guest_RDX = (ULong)(_d); \ |
| } while (0) |
| |
| UInt old_eax = (UInt)st->guest_RAX; |
| UInt old_ecx = (UInt)st->guest_RCX; |
| |
| switch (old_eax) { |
| case 0x00000000: |
| SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); |
| break; |
| case 0x00000001: |
| SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff); |
| break; |
| case 0x00000002: |
| SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); |
| break; |
| case 0x00000003: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000004: |
| switch (old_ecx) { |
| case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 0x0000003f, 0x00000000); break; |
| case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, |
| 0x0000007f, 0x00000000); break; |
| case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 0x000001ff, 0x00000000); break; |
| case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, |
| 0x00000fff, 0x00000002); break; |
| default: SET_ABCD(0x00000000, 0x00000000, |
| 0x00000000, 0x00000000); break; |
| } |
| break; |
| case 0x00000005: |
| SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| break; |
| case 0x00000006: |
| SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); |
| break; |
| case 0x00000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000008: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000009: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x0000000a: |
| SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); |
| break; |
| case 0x0000000b: |
| switch (old_ecx) { |
| case 0x00000000: |
| SET_ABCD(0x00000001, 0x00000002, |
| 0x00000100, 0x00000000); break; |
| case 0x00000001: |
| SET_ABCD(0x00000004, 0x00000004, |
| 0x00000201, 0x00000000); break; |
| default: |
| SET_ABCD(0x00000000, 0x00000000, |
| old_ecx, 0x00000000); break; |
| } |
| break; |
| case 0x0000000c: |
| SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| break; |
| case 0x0000000d: |
| switch (old_ecx) { |
| case 0x00000000: SET_ABCD(0x00000001, 0x00000002, |
| 0x00000100, 0x00000000); break; |
| case 0x00000001: SET_ABCD(0x00000004, 0x00000004, |
| 0x00000201, 0x00000000); break; |
| default: SET_ABCD(0x00000000, 0x00000000, |
| old_ecx, 0x00000000); break; |
| } |
| break; |
| case 0x80000000: |
| SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000001: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| break; |
| case 0x80000002: |
| SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| break; |
| case 0x80000003: |
| SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); |
| break; |
| case 0x80000004: |
| SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); |
| break; |
| case 0x80000005: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000006: |
| SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| break; |
| case 0x80000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| break; |
| case 0x80000008: |
| SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| default: |
| SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| break; |
| } |
| # undef SET_ABCD |
| } |
| |
| |
| /* Claim to be the following CPU (4 x ...), which is AVX and cx16 |
| capable. |
| |
| vendor_id : GenuineIntel |
| cpu family : 6 |
| model : 42 |
| model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz |
| stepping : 7 |
| cpu MHz : 1600.000 |
| cache size : 6144 KB |
| physical id : 0 |
| siblings : 4 |
| core id : 3 |
| cpu cores : 4 |
| apicid : 6 |
| initial apicid : 6 |
| fpu : yes |
| fpu_exception : yes |
| cpuid level : 13 |
| wp : yes |
| flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| mtrr pge mca cmov pat pse36 clflush dts acpi |
| mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| lm constant_tsc arch_perfmon pebs bts rep_good |
| nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 |
| xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx |
| lahf_lm ida arat epb xsaveopt pln pts dts |
| tpr_shadow vnmi flexpriority ept vpid |
| |
| bogomips : 5768.94 |
| clflush size : 64 |
| cache_alignment : 64 |
| address sizes : 36 bits physical, 48 bits virtual |
| power management: |
| */ |
| void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ) |
| { |
| # define SET_ABCD(_a,_b,_c,_d) \ |
| do { st->guest_RAX = (ULong)(_a); \ |
| st->guest_RBX = (ULong)(_b); \ |
| st->guest_RCX = (ULong)(_c); \ |
| st->guest_RDX = (ULong)(_d); \ |
| } while (0) |
| |
| UInt old_eax = (UInt)st->guest_RAX; |
| UInt old_ecx = (UInt)st->guest_RCX; |
| |
| switch (old_eax) { |
| case 0x00000000: |
| SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); |
| break; |
| case 0x00000001: |
| SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff); |
| break; |
| case 0x00000002: |
| SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000); |
| break; |
| case 0x00000003: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000004: |
| switch (old_ecx) { |
| case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 0x0000003f, 0x00000000); break; |
| case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, |
| 0x0000003f, 0x00000000); break; |
| case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 0x000001ff, 0x00000000); break; |
| case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f, |
| 0x00001fff, 0x00000006); break; |
| default: SET_ABCD(0x00000000, 0x00000000, |
| 0x00000000, 0x00000000); break; |
| } |
| break; |
| case 0x00000005: |
| SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| break; |
| case 0x00000006: |
| SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); |
| break; |
| case 0x00000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000008: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x00000009: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x0000000a: |
| SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); |
| break; |
| case 0x0000000b: |
| switch (old_ecx) { |
| case 0x00000000: |
| SET_ABCD(0x00000001, 0x00000001, |
| 0x00000100, 0x00000000); break; |
| case 0x00000001: |
| SET_ABCD(0x00000004, 0x00000004, |
| 0x00000201, 0x00000000); break; |
| default: |
| SET_ABCD(0x00000000, 0x00000000, |
| old_ecx, 0x00000000); break; |
| } |
| break; |
| case 0x0000000c: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x0000000d: |
| switch (old_ecx) { |
| case 0x00000000: SET_ABCD(0x00000007, 0x00000340, |
| 0x00000340, 0x00000000); break; |
| case 0x00000001: SET_ABCD(0x00000001, 0x00000000, |
| 0x00000000, 0x00000000); break; |
| case 0x00000002: SET_ABCD(0x00000100, 0x00000240, |
| 0x00000000, 0x00000000); break; |
| default: SET_ABCD(0x00000000, 0x00000000, |
| 0x00000000, 0x00000000); break; |
| } |
| break; |
| case 0x0000000e: |
| SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| break; |
| case 0x0000000f: |
| SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| break; |
| case 0x80000000: |
| SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000001: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| break; |
| case 0x80000002: |
| SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c); |
| break; |
| case 0x80000003: |
| SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d); |
| break; |
| case 0x80000004: |
| SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847); |
| break; |
| case 0x80000005: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| case 0x80000006: |
| SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| break; |
| case 0x80000007: |
| SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| break; |
| case 0x80000008: |
| SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| break; |
| default: |
| SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| break; |
| } |
| # undef SET_ABCD |
| } |
| |
| |
| ULong amd64g_calculate_RCR ( ULong arg, |
| ULong rot_amt, |
| ULong rflags_in, |
| Long szIN ) |
| { |
| Bool wantRflags = toBool(szIN < 0); |
| ULong sz = wantRflags ? (-szIN) : szIN; |
| ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| ULong cf=0, of=0, tempcf; |
| |
| switch (sz) { |
| case 8: |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| of = ((arg >> 63) ^ cf) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = arg & 1; |
| arg = (arg >> 1) | (cf << 63); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| break; |
| case 4: |
| while (tempCOUNT >= 33) tempCOUNT -= 33; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| of = ((arg >> 31) ^ cf) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = arg & 1; |
| arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| break; |
| case 2: |
| while (tempCOUNT >= 17) tempCOUNT -= 17; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| of = ((arg >> 15) ^ cf) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = arg & 1; |
| arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| break; |
| case 1: |
| while (tempCOUNT >= 9) tempCOUNT -= 9; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| of = ((arg >> 7) ^ cf) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = arg & 1; |
| arg = ((arg >> 1) & 0x7FULL) | (cf << 7); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| break; |
| default: |
| vpanic("calculate_RCR(amd64g): invalid size"); |
| } |
| |
| cf &= 1; |
| of &= 1; |
| rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| |
| /* caller can ask to have back either the resulting flags or |
| resulting value, but not both */ |
| return wantRflags ? rflags_in : arg; |
| } |
| |
| ULong amd64g_calculate_RCL ( ULong arg, |
| ULong rot_amt, |
| ULong rflags_in, |
| Long szIN ) |
| { |
| Bool wantRflags = toBool(szIN < 0); |
| ULong sz = wantRflags ? (-szIN) : szIN; |
| ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| ULong cf=0, of=0, tempcf; |
| |
| switch (sz) { |
| case 8: |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = (arg >> 63) & 1; |
| arg = (arg << 1) | (cf & 1); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| of = ((arg >> 63) ^ cf) & 1; |
| break; |
| case 4: |
| while (tempCOUNT >= 33) tempCOUNT -= 33; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = (arg >> 31) & 1; |
| arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| of = ((arg >> 31) ^ cf) & 1; |
| break; |
| case 2: |
| while (tempCOUNT >= 17) tempCOUNT -= 17; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = (arg >> 15) & 1; |
| arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| of = ((arg >> 15) ^ cf) & 1; |
| break; |
| case 1: |
| while (tempCOUNT >= 9) tempCOUNT -= 9; |
| cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| while (tempCOUNT > 0) { |
| tempcf = (arg >> 7) & 1; |
| arg = 0xFFULL & ((arg << 1) | (cf & 1)); |
| cf = tempcf; |
| tempCOUNT--; |
| } |
| of = ((arg >> 7) ^ cf) & 1; |
| break; |
| default: |
| vpanic("calculate_RCL(amd64g): invalid size"); |
| } |
| |
| cf &= 1; |
| of &= 1; |
| rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| |
| return wantRflags ? rflags_in : arg; |
| } |
| |
| /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) |
| * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 |
| */ |
| ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) |
| { |
| ULong hi, lo, tmp, A[16]; |
| |
| A[0] = 0; A[1] = a; |
| A[2] = A[1] << 1; A[3] = A[2] ^ a; |
| A[4] = A[2] << 1; A[5] = A[4] ^ a; |
| A[6] = A[3] << 1; A[7] = A[6] ^ a; |
| A[8] = A[4] << 1; A[9] = A[8] ^ a; |
| A[10] = A[5] << 1; A[11] = A[10] ^ a; |
| A[12] = A[6] << 1; A[13] = A[12] ^ a; |
| A[14] = A[7] << 1; A[15] = A[14] ^ a; |
| |
| lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; |
| hi = lo >> 56; |
| lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; |
| hi = (hi << 8) | (lo >> 56); |
| lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; |
| |
| ULong m0 = -1; |
| m0 /= 255; |
| tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; |
| tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; |
| tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; |
| tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; |
| tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; |
| tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; |
| tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; |
| |
| return which ? hi : lo; |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (non-referentially-transparent) */ |
| /* Horrible hack. On non-amd64 platforms, return 1. */ |
| ULong amd64g_dirtyhelper_RDTSC ( void ) |
| { |
| # if defined(__x86_64__) |
| UInt eax, edx; |
| __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); |
| return (((ULong)edx) << 32) | ((ULong)eax); |
| # else |
| return 1ULL; |
| # endif |
| } |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (non-referentially-transparent) */ |
| /* Horrible hack. On non-amd64 platforms, return 1. */ |
| /* This uses a different calling convention from _RDTSC just above |
| only because of the difficulty of returning 96 bits from a C |
| function -- RDTSC returns 64 bits and so is simple by comparison, |
| on amd64. */ |
| void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) |
| { |
| # if defined(__x86_64__) |
| UInt eax, ecx, edx; |
| __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); |
| st->guest_RAX = (ULong)eax; |
| st->guest_RCX = (ULong)ecx; |
| st->guest_RDX = (ULong)edx; |
| # else |
| /* Do nothing. */ |
| # endif |
| } |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (non-referentially-transparent) */ |
| /* Horrible hack. On non-amd64 platforms, return 0. */ |
| ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) |
| { |
| # if defined(__x86_64__) |
| ULong r = 0; |
| portno &= 0xFFFF; |
| switch (sz) { |
| case 4: |
| __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" |
| : "=a" (r) : "Nd" (portno)); |
| break; |
| case 2: |
| __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" |
| : "=a" (r) : "Nd" (portno)); |
| break; |
| case 1: |
| __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" |
| : "=a" (r) : "Nd" (portno)); |
| break; |
| default: |
| break; /* note: no 64-bit version of insn exists */ |
| } |
| return r; |
| # else |
| return 0; |
| # endif |
| } |
| |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (non-referentially-transparent) */ |
| /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) |
| { |
| # if defined(__x86_64__) |
| portno &= 0xFFFF; |
| switch (sz) { |
| case 4: |
| __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" |
| : : "a" (data), "Nd" (portno)); |
| break; |
| case 2: |
| __asm__ __volatile__("outw %w0, %w1" |
| : : "a" (data), "Nd" (portno)); |
| break; |
| case 1: |
| __asm__ __volatile__("outb %b0, %w1" |
| : : "a" (data), "Nd" (portno)); |
| break; |
| default: |
| break; /* note: no 64-bit version of insn exists */ |
| } |
| # else |
| /* do nothing */ |
| # endif |
| } |
| |
| /* CALLED FROM GENERATED CODE */ |
| /* DIRTY HELPER (non-referentially-transparent) */ |
| /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| /* op = 0: call the native SGDT instruction. |
| op = 1: call the native SIDT instruction. |
| */ |
| void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { |
| # if defined(__x86_64__) |
| switch (op) { |
| case 0: |
| __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); |
| break; |
| case 1: |
| __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); |
| break; |
| default: |
| vpanic("amd64g_dirtyhelper_SxDT"); |
| } |
| # else |
| /* do nothing */ |
| UChar* p = (UChar*)address; |
| p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; |
| p[6] = p[7] = p[8] = p[9] = 0; |
| # endif |
| } |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Helpers for MMX/SSE/SSE2. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| static inline UChar abdU8 ( UChar xx, UChar yy ) { |
| return toUChar(xx>yy ? xx-yy : yy-xx); |
| } |
| |
| static inline ULong mk32x2 ( UInt w1, UInt w0 ) { |
| return (((ULong)w1) << 32) | ((ULong)w0); |
| } |
| |
| static inline UShort sel16x4_3 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUShort(hi32 >> 16); |
| } |
| static inline UShort sel16x4_2 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUShort(hi32); |
| } |
| static inline UShort sel16x4_1 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUShort(lo32 >> 16); |
| } |
| static inline UShort sel16x4_0 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUShort(lo32); |
| } |
| |
| static inline UChar sel8x8_7 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 24); |
| } |
| static inline UChar sel8x8_6 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 16); |
| } |
| static inline UChar sel8x8_5 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 8); |
| } |
| static inline UChar sel8x8_4 ( ULong w64 ) { |
| UInt hi32 = toUInt(w64 >> 32); |
| return toUChar(hi32 >> 0); |
| } |
| static inline UChar sel8x8_3 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 24); |
| } |
| static inline UChar sel8x8_2 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 16); |
| } |
| static inline UChar sel8x8_1 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 8); |
| } |
| static inline UChar sel8x8_0 ( ULong w64 ) { |
| UInt lo32 = toUInt(w64); |
| return toUChar(lo32 >> 0); |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) |
| { |
| return |
| mk32x2( |
| (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) |
| + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), |
| (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) |
| + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) |
| ); |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) |
| { |
| UInt t = 0; |
| t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); |
| t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); |
| t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); |
| t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); |
| t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| t &= 0xFFFF; |
| return (ULong)t; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) |
| { |
| UShort t, min; |
| UInt idx; |
| t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } |
| t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } |
| t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } |
| t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } |
| t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } |
| t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } |
| t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } |
| t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } |
| return ((ULong)(idx << 16)) | ((ULong)min); |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) |
| { |
| UInt i; |
| ULong crc = (b & 0xFFULL) ^ crcIn; |
| for (i = 0; i < 8; i++) |
| crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| return crc; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) |
| { |
| UInt i; |
| ULong crc = (w & 0xFFFFULL) ^ crcIn; |
| for (i = 0; i < 16; i++) |
| crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| return crc; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) |
| { |
| UInt i; |
| ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; |
| for (i = 0; i < 32; i++) |
| crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| return crc; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) |
| { |
| ULong crc = amd64g_calc_crc32l(crcIn, q); |
| return amd64g_calc_crc32l(crc, q >> 32); |
| } |
| |
| |
| /* .. helper for next fn .. */ |
| static inline ULong sad_8x4 ( ULong xx, ULong yy ) |
| { |
| UInt t = 0; |
| t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| return (ULong)t; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo, |
| ULong dHi, ULong dLo, |
| ULong imm_and_return_control_bit ) |
| { |
| UInt imm8 = imm_and_return_control_bit & 7; |
| Bool calcHi = (imm_and_return_control_bit >> 7) & 1; |
| UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */ |
| UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */ |
| /* For src we only need 32 bits, so get them into the |
| lower half of a 64 bit word. */ |
| ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1)); |
| /* For dst we need to get hold of 56 bits (7 bytes) from a total of |
| 11 bytes. If calculating the low part of the result, need bytes |
| dstOffsL * 4 + (0 .. 6); if calculating the high part, |
| dstOffsL * 4 + (4 .. 10). */ |
| ULong dst; |
| /* dstOffL = 0, Lo -> 0 .. 6 |
| dstOffL = 1, Lo -> 4 .. 10 |
| dstOffL = 0, Hi -> 4 .. 10 |
| dstOffL = 1, Hi -> 8 .. 14 |
| */ |
| if (calcHi && dstOffsL) { |
| /* 8 .. 14 */ |
| dst = dHi & 0x00FFFFFFFFFFFFFFULL; |
| } |
| else if (!calcHi && !dstOffsL) { |
| /* 0 .. 6 */ |
| dst = dLo & 0x00FFFFFFFFFFFFFFULL; |
| } |
| else { |
| /* 4 .. 10 */ |
| dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32); |
| } |
| ULong r0 = sad_8x4( dst >> 0, src ); |
| ULong r1 = sad_8x4( dst >> 8, src ); |
| ULong r2 = sad_8x4( dst >> 16, src ); |
| ULong r3 = sad_8x4( dst >> 24, src ); |
| ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0; |
| return res; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_pext ( ULong src_masked, ULong mask ) |
| { |
| ULong dst = 0; |
| ULong src_bit; |
| ULong dst_bit = 1; |
| for (src_bit = 1; src_bit; src_bit <<= 1) { |
| if (mask & src_bit) { |
| if (src_masked & src_bit) dst |= dst_bit; |
| dst_bit <<= 1; |
| } |
| } |
| return dst; |
| } |
| |
| /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| ULong amd64g_calculate_pdep ( ULong src, ULong mask ) |
| { |
| ULong dst = 0; |
| ULong dst_bit; |
| ULong src_bit = 1; |
| for (dst_bit = 1; dst_bit; dst_bit <<= 1) { |
| if (mask & dst_bit) { |
| if (src & src_bit) dst |= dst_bit; |
| src_bit <<= 1; |
| } |
| } |
| return dst; |
| } |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| static UInt zmask_from_V128 ( V128* arg ) |
| { |
| UInt i, res = 0; |
| for (i = 0; i < 16; i++) { |
| res |= ((arg->w8[i] == 0) ? 1 : 0) << i; |
| } |
| return res; |
| } |
| |
| static UInt zmask_from_V128_wide ( V128* arg ) |
| { |
| UInt i, res = 0; |
| for (i = 0; i < 8; i++) { |
| res |= ((arg->w16[i] == 0) ? 1 : 0) << i; |
| } |
| return res; |
| } |
| |
| /* Helps with PCMP{I,E}STR{I,M}. |
| |
| CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, |
| actually it could be a clean helper, but for the fact that we can't |
| pass by value 2 x V128 to a clean helper, nor have one returned.) |
| Reads guest state, writes to guest state for the xSTRM cases, no |
| accesses of memory, is a pure function. |
| |
| opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so |
| the callee knows which I/E and I/M variant it is dealing with and |
| what the specific operation is. 4th byte of opcode is in the range |
| 0x60 to 0x63: |
| istri 66 0F 3A 63 |
| istrm 66 0F 3A 62 |
| estri 66 0F 3A 61 |
| estrm 66 0F 3A 60 |
| |
| gstOffL and gstOffR are the guest state offsets for the two XMM |
| register inputs. We never have to deal with the memory case since |
| that is handled by pre-loading the relevant value into the fake |
| XMM16 register. |
| |
| For ESTRx variants, edxIN and eaxIN hold the values of those two |
| registers. |
| |
| In all cases, the bottom 16 bits of the result contain the new |
| OSZACP %rflags values. For xSTRI variants, bits[31:16] of the |
| result hold the new %ecx value. For xSTRM variants, the helper |
| writes the result directly to the guest XMM0. |
| |
| Declarable side effects: in all cases, reads guest state at |
| [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes |
| guest_XMM0. |
| |
| Is expected to be called with opc_and_imm combinations which have |
| actually been validated, and will assert if otherwise. The front |
| end should ensure we're only called with verified values. |
| */ |
| ULong amd64g_dirtyhelper_PCMPxSTRx ( |
| VexGuestAMD64State* gst, |
| HWord opc4_and_imm, |
| HWord gstOffL, HWord gstOffR, |
| HWord edxIN, HWord eaxIN |
| ) |
| { |
| HWord opc4 = (opc4_and_imm >> 8) & 0xFF; |
| HWord imm8 = opc4_and_imm & 0xFF; |
| HWord isISTRx = opc4 & 2; |
| HWord isxSTRM = (opc4 & 1) ^ 1; |
| vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ |
| HWord wide = (imm8 & 1); |
| |
| // where the args are |
| V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
| |
| /* Create the arg validity masks, either from the vectors |
| themselves or from the supplied edx/eax values. */ |
| // FIXME: this is only right for the 8-bit data cases. |
| // At least that is asserted above. |
| UInt zmaskL, zmaskR; |
| |
| // temp spot for the resulting flags and vector. |
| V128 resV; |
| UInt resOSZACP; |
| |
| // for checking whether case was handled |
| Bool ok = False; |
| |
| if (wide) { |
| if (isISTRx) { |
| zmaskL = zmask_from_V128_wide(argL); |
| zmaskR = zmask_from_V128_wide(argR); |
| } else { |
| Int tmp; |
| tmp = edxIN & 0xFFFFFFFF; |
| if (tmp < -8) tmp = -8; |
| if (tmp > 8) tmp = 8; |
| if (tmp < 0) tmp = -tmp; |
| vassert(tmp >= 0 && tmp <= 8); |
| zmaskL = (1 << tmp) & 0xFF; |
| tmp = eaxIN & 0xFFFFFFFF; |
| if (tmp < -8) tmp = -8; |
| if (tmp > 8) tmp = 8; |
| if (tmp < 0) tmp = -tmp; |
| vassert(tmp >= 0 && tmp <= 8); |
| zmaskR = (1 << tmp) & 0xFF; |
| } |
| // do the meyaath |
| ok = compute_PCMPxSTRx_wide ( |
| &resV, &resOSZACP, argL, argR, |
| zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| ); |
| } else { |
| if (isISTRx) { |
| zmaskL = zmask_from_V128(argL); |
| zmaskR = zmask_from_V128(argR); |
| } else { |
| Int tmp; |
| tmp = edxIN & 0xFFFFFFFF; |
| if (tmp < -16) tmp = -16; |
| if (tmp > 16) tmp = 16; |
| if (tmp < 0) tmp = -tmp; |
| vassert(tmp >= 0 && tmp <= 16); |
| zmaskL = (1 << tmp) & 0xFFFF; |
| tmp = eaxIN & 0xFFFFFFFF; |
| if (tmp < -16) tmp = -16; |
| if (tmp > 16) tmp = 16; |
| if (tmp < 0) tmp = -tmp; |
| vassert(tmp >= 0 && tmp <= 16); |
| zmaskR = (1 << tmp) & 0xFFFF; |
| } |
| // do the meyaath |
| ok = compute_PCMPxSTRx ( |
| &resV, &resOSZACP, argL, argR, |
| zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| ); |
| } |
| |
| // front end shouldn't pass us any imm8 variants we can't |
| // handle. Hence: |
| vassert(ok); |
| |
| // So, finally we need to get the results back to the caller. |
| // In all cases, the new OSZACP value is the lowest 16 of |
| // the return value. |
| if (isxSTRM) { |
| gst->guest_YMM0[0] = resV.w32[0]; |
| gst->guest_YMM0[1] = resV.w32[1]; |
| gst->guest_YMM0[2] = resV.w32[2]; |
| gst->guest_YMM0[3] = resV.w32[3]; |
| return resOSZACP & 0x8D5; |
| } else { |
| UInt newECX = resV.w32[0] & 0xFFFF; |
| return (newECX << 16) | (resOSZACP & 0x8D5); |
| } |
| } |
| |
| /*---------------------------------------------------------------*/ |
| /*--- AES primitives and helpers ---*/ |
| /*---------------------------------------------------------------*/ |
| /* a 16 x 16 matrix */ |
| static const UChar sbox[256] = { // row nr |
| 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1 |
| 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, |
| 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2 |
| 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, |
| 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3 |
| 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, |
| 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4 |
| 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, |
| 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5 |
| 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, |
| 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6 |
| 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, |
| 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7 |
| 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, |
| 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8 |
| 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, |
| 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9 |
| 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, |
| 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10 |
| 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, |
| 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11 |
| 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, |
| 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12 |
| 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, |
| 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13 |
| 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, |
| 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14 |
| 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, |
| 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15 |
| 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, |
| 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16 |
| 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 |
| }; |
| static void SubBytes (V128* v) |
| { |
| V128 r; |
| UInt i; |
| for (i = 0; i < 16; i++) |
| r.w8[i] = sbox[v->w8[i]]; |
| *v = r; |
| } |
| |
| /* a 16 x 16 matrix */ |
| static const UChar invsbox[256] = { // row nr |
| 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1 |
| 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, |
| 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2 |
| 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, |
| 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3 |
| 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, |
| 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4 |
| 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, |
| 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5 |
| 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, |
| 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6 |
| 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, |
| 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7 |
| 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, |
| 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8 |
| 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, |
| 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9 |
| 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, |
| 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10 |
| 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, |
| 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11 |
| 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, |
| 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12 |
| 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, |
| 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13 |
| 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, |
| 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14 |
| 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, |
| 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15 |
| 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, |
| 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16 |
| 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d |
| }; |
| static void InvSubBytes (V128* v) |
| { |
| V128 r; |
| UInt i; |
| for (i = 0; i < 16; i++) |
| r.w8[i] = invsbox[v->w8[i]]; |
| *v = r; |
| } |
| |
| static const UChar ShiftRows_op[16] = |
| {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0}; |
| static void ShiftRows (V128* v) |
| { |
| V128 r; |
| UInt i; |
| for (i = 0; i < 16; i++) |
| r.w8[i] = v->w8[ShiftRows_op[15-i]]; |
| *v = r; |
| } |
| |
| static const UChar InvShiftRows_op[16] = |
| {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0}; |
| static void InvShiftRows (V128* v) |
| { |
| V128 r; |
| UInt i; |
| for (i = 0; i < 16; i++) |
| r.w8[i] = v->w8[InvShiftRows_op[15-i]]; |
| *v = r; |
| } |
| |
| /* Multiplication of the finite fields elements of AES. |
| See "A Specification for The AES Algorithm Rijndael |
| (by Joan Daemen & Vincent Rijmen)" |
| Dr. Brian Gladman, v3.1, 3rd March 2001. */ |
| /* N values so that (hex) xy = 0x03^N. |
| 0x00 cannot be used. We put 0xff for this value.*/ |
| /* a 16 x 16 matrix */ |
| static const UChar Nxy[256] = { // row nr |
| 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1 |
| 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, |
| 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2 |
| 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, |
| 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3 |
| 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, |
| 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4 |
| 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, |
| 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5 |
| 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, |
| 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6 |
| 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, |
| 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7 |
| 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, |
| 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8 |
| 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, |
| 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9 |
| 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, |
| 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10 |
| 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, |
| 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11 |
| 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, |
| 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12 |
| 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, |
| 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13 |
| 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, |
| 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14 |
| 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, |
| 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15 |
| 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, |
| 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16 |
| 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07 |
| }; |
| |
| /* E values so that E = 0x03^xy. */ |
| static const UChar Exy[256] = { // row nr |
| 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1 |
| 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35, |
| 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2 |
| 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa, |
| 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3 |
| 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31, |
| 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4 |
| 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd, |
| 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5 |
| 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88, |
| 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6 |
| 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a, |
| 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7 |
| 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3, |
| 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8 |
| 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0, |
| 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9 |
| 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41, |
| 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10 |
| 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75, |
| 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11 |
| 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80, |
| 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12 |
| 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54, |
| 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13 |
| 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca, |
| 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14 |
| 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e, |
| 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15 |
| 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17, |
| 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16 |
| 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01}; |
| |
| static inline UChar ff_mul(UChar u1, UChar u2) |
| { |
| if ((u1 > 0) && (u2 > 0)) { |
| UInt ui = Nxy[u1] + Nxy[u2]; |
| if (ui >= 255) |
| ui = ui - 255; |
| return Exy[ui]; |
| } else { |
| return 0; |
| }; |
| } |
| |
| static void MixColumns (V128* v) |
| { |
| V128 r; |
| Int j; |
| #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| for (j = 0; j < 4; j++) { |
| P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1)) |
| ^ P(v,j,2) ^ P(v,j,3); |
| P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) ) |
| ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3); |
| P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) ) |
| ^ ff_mul(0x03, P(v,j,3) ); |
| P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2) |
| ^ ff_mul( 0x02, P(v,j,3) ); |
| } |
| *v = r; |
| #undef P |
| } |
| |
| static void InvMixColumns (V128* v) |
| { |
| V128 r; |
| Int j; |
| #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| for (j = 0; j < 4; j++) { |
| P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) ) |
| ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) ); |
| P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) ) |
| ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) ); |
| P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) ) |
| ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) ); |
| P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) ) |
| ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) ); |
| } |
| *v = r; |
| #undef P |
| |
| } |
| |
| /* For description, see definition in guest_amd64_defs.h */ |
| void amd64g_dirtyhelper_AES ( |
| VexGuestAMD64State* gst, |
| HWord opc4, HWord gstOffD, |
| HWord gstOffL, HWord gstOffR |
| ) |
| { |
| // where the args are |
| V128* argD = (V128*)( ((UChar*)gst) + gstOffD ); |
| V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
| V128 r; |
| |
| switch (opc4) { |
| case 0xDC: /* AESENC */ |
| case 0xDD: /* AESENCLAST */ |
| r = *argR; |
| ShiftRows (&r); |
| SubBytes (&r); |
| if (opc4 == 0xDC) |
| MixColumns (&r); |
| argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
| break; |
| |
| case 0xDE: /* AESDEC */ |
| case 0xDF: /* AESDECLAST */ |
| r = *argR; |
| InvShiftRows (&r); |
| InvSubBytes (&r); |
| if (opc4 == 0xDE) |
| InvMixColumns (&r); |
| argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
| break; |
| |
| case 0xDB: /* AESIMC */ |
| *argD = *argL; |
| InvMixColumns (argD); |
| break; |
| default: vassert(0); |
| } |
| } |
| |
| static inline UInt RotWord (UInt w32) |
| { |
| return ((w32 >> 8) | (w32 << 24)); |
| } |
| |
| static inline UInt SubWord (UInt w32) |
| { |
| UChar *w8; |
| UChar *r8; |
| UInt res; |
| w8 = (UChar*) &w32; |
| r8 = (UChar*) &res; |
| r8[0] = sbox[w8[0]]; |
| r8[1] = sbox[w8[1]]; |
| r8[2] = sbox[w8[2]]; |
| r8[3] = sbox[w8[3]]; |
| return res; |
| } |
| |
| /* For description, see definition in guest_amd64_defs.h */ |
| extern void amd64g_dirtyhelper_AESKEYGENASSIST ( |
| VexGuestAMD64State* gst, |
| HWord imm8, |
| HWord gstOffL, HWord gstOffR |
| ) |
| { |
| // where the args are |
| V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
| |
| argR->w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8; |
| argR->w32[2] = SubWord (argL->w32[3]); |
| argR->w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8; |
| argR->w32[0] = SubWord (argL->w32[1]); |
| } |
| |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- Helpers for dealing with, and describing, ---*/ |
| /*--- guest state as a whole. ---*/ |
| /*---------------------------------------------------------------*/ |
| |
| /* Initialise the entire amd64 guest state. */ |
| /* VISIBLE TO LIBVEX CLIENT */ |
| void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) |
| { |
| vex_state->host_EvC_FAILADDR = 0; |
| vex_state->host_EvC_COUNTER = 0; |
| vex_state->pad0 = 0; |
| |
| vex_state->guest_RAX = 0; |
| vex_state->guest_RCX = 0; |
| vex_state->guest_RDX = 0; |
| vex_state->guest_RBX = 0; |
| vex_state->guest_RSP = 0; |
| vex_state->guest_RBP = 0; |
| vex_state->guest_RSI = 0; |
| vex_state->guest_RDI = 0; |
| vex_state->guest_R8 = 0; |
| vex_state->guest_R9 = 0; |
| vex_state->guest_R10 = 0; |
| vex_state->guest_R11 = 0; |
| vex_state->guest_R12 = 0; |
| vex_state->guest_R13 = 0; |
| vex_state->guest_R14 = 0; |
| vex_state->guest_R15 = 0; |
| |
| vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| vex_state->guest_CC_DEP1 = 0; |
| vex_state->guest_CC_DEP2 = 0; |
| vex_state->guest_CC_NDEP = 0; |
| |
| vex_state->guest_DFLAG = 1; /* forwards */ |
| vex_state->guest_IDFLAG = 0; |
| |
| /* HACK: represent the offset associated with %fs==0. This |
| assumes that %fs is only ever zero. */ |
| vex_state->guest_FS_ZERO = 0; |
| |
| vex_state->guest_RIP = 0; |
| |
| /* Initialise the simulated FPU */ |
| amd64g_dirtyhelper_FINIT( vex_state ); |
| |
| /* Initialise the AVX state. */ |
| # define AVXZERO(_ymm) \ |
| do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \ |
| _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \ |
| } while (0) |
| vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; |
| AVXZERO(vex_state->guest_YMM0); |
| AVXZERO(vex_state->guest_YMM1); |
| AVXZERO(vex_state->guest_YMM2); |
| AVXZERO(vex_state->guest_YMM3); |
| AVXZERO(vex_state->guest_YMM4); |
| AVXZERO(vex_state->guest_YMM5); |
| AVXZERO(vex_state->guest_YMM6); |
| AVXZERO(vex_state->guest_YMM7); |
| AVXZERO(vex_state->guest_YMM8); |
| AVXZERO(vex_state->guest_YMM9); |
| AVXZERO(vex_state->guest_YMM10); |
| AVXZERO(vex_state->guest_YMM11); |
| AVXZERO(vex_state->guest_YMM12); |
| AVXZERO(vex_state->guest_YMM13); |
| AVXZERO(vex_state->guest_YMM14); |
| AVXZERO(vex_state->guest_YMM15); |
| AVXZERO(vex_state->guest_YMM16); |
| |
| # undef AVXZERO |
| |
| vex_state->guest_EMNOTE = EmNote_NONE; |
| |
| /* These should not ever be either read or written, but we |
| initialise them anyway. */ |
| vex_state->guest_TISTART = 0; |
| vex_state->guest_TILEN = 0; |
| |
| vex_state->guest_NRADDR = 0; |
| vex_state->guest_SC_CLASS = 0; |
| vex_state->guest_GS_0x60 = 0; |
| |
| vex_state->guest_IP_AT_SYSCALL = 0; |
| vex_state->pad1 = 0; |
| } |
| |
| |
| /* Figure out if any part of the guest state contained in minoff |
| .. maxoff requires precise memory exceptions. If in doubt return |
| True (but this generates significantly slower code). |
| |
| By default we enforce precise exns for guest %RSP, %RBP and %RIP |
| only. These are the minimum needed to extract correct stack |
| backtraces from amd64 code. |
| |
| Only %RSP is needed in mode VexRegUpdSpAtMemAccess. |
| */ |
| Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff, |
| Int maxoff) |
| { |
| Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); |
| Int rbp_max = rbp_min + 8 - 1; |
| Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); |
| Int rsp_max = rsp_min + 8 - 1; |
| Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); |
| Int rip_max = rip_min + 8 - 1; |
| |
| if (maxoff < rsp_min || minoff > rsp_max) { |
| /* no overlap with rsp */ |
| if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess) |
| return False; // We only need to check stack pointer. |
| } else { |
| return True; |
| } |
| |
| if (maxoff < rbp_min || minoff > rbp_max) { |
| /* no overlap with rbp */ |
| } else { |
| return True; |
| } |
| |
| if (maxoff < rip_min || minoff > rip_max) { |
| /* no overlap with eip */ |
| } else { |
| return True; |
| } |
| |
| return False; |
| } |
| |
| |
| #define ALWAYSDEFD(field) \ |
| { offsetof(VexGuestAMD64State, field), \ |
| (sizeof ((VexGuestAMD64State*)0)->field) } |
| |
| VexGuestLayout |
| amd64guest_layout |
| = { |
| /* Total size of the guest state, in bytes. */ |
| .total_sizeB = sizeof(VexGuestAMD64State), |
| |
| /* Describe the stack pointer. */ |
| .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), |
| .sizeof_SP = 8, |
| |
| /* Describe the frame pointer. */ |
| .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), |
| .sizeof_FP = 8, |
| |
| /* Describe the instruction pointer. */ |
| .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), |
| .sizeof_IP = 8, |
| |
| /* Describe any sections to be regarded by Memcheck as |
| 'always-defined'. */ |
| .n_alwaysDefd = 16, |
| |
| /* flags thunk: OP and NDEP are always defd, whereas DEP1 |
| and DEP2 have to be tracked. See detailed comment in |
| gdefs.h on meaning of thunk fields. */ |
| .alwaysDefd |
| = { /* 0 */ ALWAYSDEFD(guest_CC_OP), |
| /* 1 */ ALWAYSDEFD(guest_CC_NDEP), |
| /* 2 */ ALWAYSDEFD(guest_DFLAG), |
| /* 3 */ ALWAYSDEFD(guest_IDFLAG), |
| /* 4 */ ALWAYSDEFD(guest_RIP), |
| /* 5 */ ALWAYSDEFD(guest_FS_ZERO), |
| /* 6 */ ALWAYSDEFD(guest_FTOP), |
| /* 7 */ ALWAYSDEFD(guest_FPTAG), |
| /* 8 */ ALWAYSDEFD(guest_FPROUND), |
| /* 9 */ ALWAYSDEFD(guest_FC3210), |
| // /* */ ALWAYSDEFD(guest_CS), |
| // /* */ ALWAYSDEFD(guest_DS), |
| // /* */ ALWAYSDEFD(guest_ES), |
| // /* */ ALWAYSDEFD(guest_FS), |
| // /* */ ALWAYSDEFD(guest_GS), |
| // /* */ ALWAYSDEFD(guest_SS), |
| // /* */ ALWAYSDEFD(guest_LDT), |
| // /* */ ALWAYSDEFD(guest_GDT), |
| /* 10 */ ALWAYSDEFD(guest_EMNOTE), |
| /* 11 */ ALWAYSDEFD(guest_SSEROUND), |
| /* 12 */ ALWAYSDEFD(guest_TISTART), |
| /* 13 */ ALWAYSDEFD(guest_TILEN), |
| /* 14 */ ALWAYSDEFD(guest_SC_CLASS), |
| /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) |
| } |
| }; |
| |
| |
| /*---------------------------------------------------------------*/ |
| /*--- end guest_amd64_helpers.c ---*/ |
| /*---------------------------------------------------------------*/ |