njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 1 | |
| 2 | /*---------------------------------------------------------------*/ |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 3 | /*--- begin guest_amd64_helpers.c ---*/ |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4 | /*---------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 7 | This file is part of Valgrind, a dynamic binary instrumentation |
| 8 | framework. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 9 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 10 | Copyright (C) 2004-2017 OpenWorks LLP |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 11 | info@open-works.net |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 12 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 17 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
sewardj | 7bd6ffe | 2005-08-03 16:07:36 +0000 | [diff] [blame] | 26 | 02110-1301, USA. |
| 27 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 28 | The GNU General Public License is contained in the file COPYING. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 29 | |
| 30 | Neither the names of the U.S. Department of Energy nor the |
| 31 | University of California nor the names of its contributors may be |
| 32 | used to endorse or promote products derived from this software |
| 33 | without prior written permission. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 34 | */ |
| 35 | |
| 36 | #include "libvex_basictypes.h" |
florian | 33b0243 | 2012-08-25 21:48:04 +0000 | [diff] [blame] | 37 | #include "libvex_emnote.h" |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 38 | #include "libvex_guest_amd64.h" |
| 39 | #include "libvex_ir.h" |
| 40 | #include "libvex.h" |
| 41 | |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 42 | #include "main_util.h" |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 43 | #include "main_globals.h" |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 44 | #include "guest_generic_bb_to_IR.h" |
| 45 | #include "guest_amd64_defs.h" |
| 46 | #include "guest_generic_x87.h" |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 47 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 48 | |
| 49 | /* This file contains helper functions for amd64 guest code. |
| 50 | Calls to these functions are generated by the back end. |
| 51 | These calls are of course in the host machine code and |
| 52 | this file will be compiled to host machine code, so that |
| 53 | all makes sense. |
| 54 | |
| 55 | Only change the signatures of these helper functions very |
| 56 | carefully. If you change the signature here, you'll have to change |
| 57 | the parameters passed to it in the IR calls constructed by |
| 58 | guest-amd64/toIR.c. |
| 59 | |
| 60 | The convention used is that all functions called from generated |
| 61 | code are named amd64g_<something>, and any function whose name lacks |
| 62 | that prefix is not called from generated code. Note that some |
| 63 | LibVEX_* functions can however be called by VEX's client, but that |
| 64 | is not the same as calling them from VEX-generated code. |
| 65 | */ |
| 66 | |
| 67 | |
| 68 | /* Set to 1 to get detailed profiling info about use of the flag |
| 69 | machinery. */ |
| 70 | #define PROFILE_RFLAGS 0 |
| 71 | |
| 72 | |
| 73 | /*---------------------------------------------------------------*/ |
| 74 | /*--- %rflags run-time helpers. ---*/ |
| 75 | /*---------------------------------------------------------------*/ |
| 76 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 77 | /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags |
| 78 | after imulq/mulq. */ |
| 79 | |
| 80 | static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) |
| 81 | { |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 82 | const Long halfMask = 0xFFFFFFFFLL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 83 | ULong u0, v0, w0; |
| 84 | Long u1, v1, w1, w2, t; |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 85 | u0 = u & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 86 | u1 = u >> 32; |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 87 | v0 = v & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 88 | v1 = v >> 32; |
| 89 | w0 = u0 * v0; |
| 90 | t = u1 * v0 + (w0 >> 32); |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 91 | w1 = t & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 92 | w2 = t >> 32; |
| 93 | w1 = u0 * v1 + w1; |
| 94 | *rHi = u1 * v1 + w2 + (w1 >> 32); |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 95 | *rLo = (Long)((ULong)u * (ULong)v); |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 96 | } |
| 97 | |
| 98 | static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) |
| 99 | { |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 100 | const ULong halfMask = 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 101 | ULong u0, v0, w0; |
| 102 | ULong u1, v1, w1,w2,t; |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 103 | u0 = u & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 104 | u1 = u >> 32; |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 105 | v0 = v & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 106 | v1 = v >> 32; |
| 107 | w0 = u0 * v0; |
| 108 | t = u1 * v0 + (w0 >> 32); |
sewardj | 095b4cb | 2015-06-30 13:37:45 +0000 | [diff] [blame] | 109 | w1 = t & halfMask; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 110 | w2 = t >> 32; |
| 111 | w1 = u0 * v1 + w1; |
| 112 | *rHi = u1 * v1 + w2 + (w1 >> 32); |
| 113 | *rLo = u * v; |
| 114 | } |
| 115 | |
| 116 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 117 | static const UChar parity_table[256] = { |
| 118 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 119 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 120 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 121 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 122 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 123 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 124 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 125 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 126 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 127 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 128 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 129 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 130 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 131 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 132 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 133 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 134 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 135 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 136 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 137 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 138 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 139 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 140 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 141 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 142 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 143 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 144 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 145 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 146 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 147 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 148 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 149 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 150 | }; |
| 151 | |
sewardj | 4a6f384 | 2005-03-26 11:59:23 +0000 | [diff] [blame] | 152 | /* generalised left-shifter */ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 153 | static inline Long lshift ( Long x, Int n ) |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 154 | { |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 155 | if (n >= 0) |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 156 | return (ULong)x << n; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 157 | else |
| 158 | return x >> (-n); |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 159 | } |
| 160 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 161 | /* identity on ULong */ |
| 162 | static inline ULong idULong ( ULong x ) |
| 163 | { |
| 164 | return x; |
| 165 | } |
| 166 | |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 167 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 168 | #define PREAMBLE(__data_bits) \ |
| 169 | /* const */ ULong DATA_MASK \ |
| 170 | = __data_bits==8 \ |
| 171 | ? 0xFFULL \ |
| 172 | : (__data_bits==16 \ |
| 173 | ? 0xFFFFULL \ |
| 174 | : (__data_bits==32 \ |
| 175 | ? 0xFFFFFFFFULL \ |
| 176 | : 0xFFFFFFFFFFFFFFFFULL)); \ |
| 177 | /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ |
| 178 | /* const */ ULong CC_DEP1 = cc_dep1_formal; \ |
| 179 | /* const */ ULong CC_DEP2 = cc_dep2_formal; \ |
| 180 | /* const */ ULong CC_NDEP = cc_ndep_formal; \ |
| 181 | /* Four bogus assignments, which hopefully gcc can */ \ |
| 182 | /* optimise away, and which stop it complaining about */ \ |
| 183 | /* unused variables. */ \ |
| 184 | SIGN_MASK = SIGN_MASK; \ |
| 185 | DATA_MASK = DATA_MASK; \ |
| 186 | CC_DEP2 = CC_DEP2; \ |
| 187 | CC_NDEP = CC_NDEP; |
| 188 | |
| 189 | |
| 190 | /*-------------------------------------------------------------*/ |
| 191 | |
| 192 | #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ |
| 193 | { \ |
| 194 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 195 | { ULong cf, pf, af, zf, sf, of; \ |
| 196 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 197 | argL = CC_DEP1; \ |
| 198 | argR = CC_DEP2; \ |
| 199 | res = argL + argR; \ |
| 200 | cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| 201 | pf = parity_table[(UChar)res]; \ |
| 202 | af = (res ^ argL ^ argR) & 0x10; \ |
| 203 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 204 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 205 | of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 206 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 207 | return cf | pf | af | zf | sf | of; \ |
| 208 | } \ |
sewardj | df0e002 | 2005-01-25 15:48:43 +0000 | [diff] [blame] | 209 | } |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 210 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 211 | /*-------------------------------------------------------------*/ |
| 212 | |
| 213 | #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ |
| 214 | { \ |
| 215 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 216 | { ULong cf, pf, af, zf, sf, of; \ |
| 217 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 218 | argL = CC_DEP1; \ |
| 219 | argR = CC_DEP2; \ |
| 220 | res = argL - argR; \ |
| 221 | cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| 222 | pf = parity_table[(UChar)res]; \ |
| 223 | af = (res ^ argL ^ argR) & 0x10; \ |
| 224 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 225 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 226 | of = lshift((argL ^ argR) & (argL ^ res), \ |
| 227 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 228 | return cf | pf | af | zf | sf | of; \ |
| 229 | } \ |
sewardj | 354e5c6 | 2005-01-27 20:12:52 +0000 | [diff] [blame] | 230 | } |
| 231 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 232 | /*-------------------------------------------------------------*/ |
| 233 | |
| 234 | #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ |
| 235 | { \ |
| 236 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 237 | { ULong cf, pf, af, zf, sf, of; \ |
| 238 | ULong argL, argR, oldC, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 239 | oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 240 | argL = CC_DEP1; \ |
| 241 | argR = CC_DEP2 ^ oldC; \ |
| 242 | res = (argL + argR) + oldC; \ |
| 243 | if (oldC) \ |
| 244 | cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ |
| 245 | else \ |
| 246 | cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| 247 | pf = parity_table[(UChar)res]; \ |
| 248 | af = (res ^ argL ^ argR) & 0x10; \ |
| 249 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 250 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 251 | of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 252 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 253 | return cf | pf | af | zf | sf | of; \ |
| 254 | } \ |
| 255 | } |
| 256 | |
| 257 | /*-------------------------------------------------------------*/ |
| 258 | |
| 259 | #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ |
| 260 | { \ |
| 261 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 262 | { ULong cf, pf, af, zf, sf, of; \ |
| 263 | ULong argL, argR, oldC, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 264 | oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 265 | argL = CC_DEP1; \ |
| 266 | argR = CC_DEP2 ^ oldC; \ |
| 267 | res = (argL - argR) - oldC; \ |
| 268 | if (oldC) \ |
| 269 | cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ |
| 270 | else \ |
| 271 | cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| 272 | pf = parity_table[(UChar)res]; \ |
| 273 | af = (res ^ argL ^ argR) & 0x10; \ |
| 274 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 275 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 276 | of = lshift((argL ^ argR) & (argL ^ res), \ |
| 277 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 278 | return cf | pf | af | zf | sf | of; \ |
| 279 | } \ |
| 280 | } |
| 281 | |
| 282 | /*-------------------------------------------------------------*/ |
| 283 | |
| 284 | #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ |
| 285 | { \ |
| 286 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 287 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 288 | cf = 0; \ |
| 289 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 290 | af = 0; \ |
| 291 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 292 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 293 | of = 0; \ |
| 294 | return cf | pf | af | zf | sf | of; \ |
| 295 | } \ |
| 296 | } |
| 297 | |
| 298 | /*-------------------------------------------------------------*/ |
| 299 | |
| 300 | #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ |
| 301 | { \ |
| 302 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 303 | { ULong cf, pf, af, zf, sf, of; \ |
| 304 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 305 | res = CC_DEP1; \ |
| 306 | argL = res - 1; \ |
| 307 | argR = 1; \ |
| 308 | cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 309 | pf = parity_table[(UChar)res]; \ |
| 310 | af = (res ^ argL ^ argR) & 0x10; \ |
| 311 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 312 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 313 | of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ |
| 314 | return cf | pf | af | zf | sf | of; \ |
| 315 | } \ |
| 316 | } |
| 317 | |
| 318 | /*-------------------------------------------------------------*/ |
| 319 | |
| 320 | #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ |
| 321 | { \ |
| 322 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 323 | { ULong cf, pf, af, zf, sf, of; \ |
| 324 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 325 | res = CC_DEP1; \ |
| 326 | argL = res + 1; \ |
| 327 | argR = 1; \ |
| 328 | cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 329 | pf = parity_table[(UChar)res]; \ |
| 330 | af = (res ^ argL ^ argR) & 0x10; \ |
| 331 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 332 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 333 | of = ((res & DATA_MASK) \ |
| 334 | == ((ULong)SIGN_MASK - 1)) << 11; \ |
| 335 | return cf | pf | af | zf | sf | of; \ |
| 336 | } \ |
| 337 | } |
| 338 | |
| 339 | /*-------------------------------------------------------------*/ |
| 340 | |
| 341 | #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ |
| 342 | { \ |
| 343 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 344 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 345 | cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ |
| 346 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 347 | af = 0; /* undefined */ \ |
| 348 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 349 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 350 | /* of is defined if shift count == 1 */ \ |
| 351 | of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| 352 | & AMD64G_CC_MASK_O; \ |
| 353 | return cf | pf | af | zf | sf | of; \ |
| 354 | } \ |
| 355 | } |
| 356 | |
| 357 | /*-------------------------------------------------------------*/ |
| 358 | |
| 359 | #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ |
| 360 | { \ |
| 361 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 362 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 363 | cf = CC_DEP2 & 1; \ |
| 364 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 365 | af = 0; /* undefined */ \ |
| 366 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 367 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 368 | /* of is defined if shift count == 1 */ \ |
| 369 | of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| 370 | & AMD64G_CC_MASK_O; \ |
| 371 | return cf | pf | af | zf | sf | of; \ |
| 372 | } \ |
| 373 | } |
| 374 | |
| 375 | /*-------------------------------------------------------------*/ |
| 376 | |
| 377 | /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ |
| 378 | /* DEP1 = result, NDEP = old flags */ |
| 379 | #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ |
| 380 | { \ |
| 381 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 382 | { ULong fl \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 383 | = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 384 | | (AMD64G_CC_MASK_C & CC_DEP1) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 385 | | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 386 | 11-(DATA_BITS-1)) \ |
| 387 | ^ lshift(CC_DEP1, 11))); \ |
| 388 | return fl; \ |
| 389 | } \ |
| 390 | } |
| 391 | |
| 392 | /*-------------------------------------------------------------*/ |
| 393 | |
| 394 | /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ |
| 395 | /* DEP1 = result, NDEP = old flags */ |
| 396 | #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ |
| 397 | { \ |
| 398 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 399 | { ULong fl \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 400 | = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| 401 | | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ |
| 402 | | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 403 | 11-(DATA_BITS-1)) \ |
| 404 | ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ |
| 405 | return fl; \ |
| 406 | } \ |
| 407 | } |
| 408 | |
| 409 | /*-------------------------------------------------------------*/ |
| 410 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 411 | #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ |
| 412 | DATA_U2TYPE, NARROWto2U) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 413 | { \ |
| 414 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 415 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 416 | DATA_UTYPE hi; \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 417 | DATA_UTYPE lo \ |
| 418 | = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ |
| 419 | * ((DATA_UTYPE)CC_DEP2) ); \ |
| 420 | DATA_U2TYPE rr \ |
| 421 | = NARROWto2U( \ |
| 422 | ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ |
| 423 | * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ |
| 424 | hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 425 | cf = (hi != 0); \ |
| 426 | pf = parity_table[(UChar)lo]; \ |
| 427 | af = 0; /* undefined */ \ |
| 428 | zf = (lo == 0) << 6; \ |
| 429 | sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| 430 | of = cf << 11; \ |
| 431 | return cf | pf | af | zf | sf | of; \ |
| 432 | } \ |
| 433 | } |
| 434 | |
| 435 | /*-------------------------------------------------------------*/ |
| 436 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 437 | #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ |
| 438 | DATA_S2TYPE, NARROWto2S) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 439 | { \ |
| 440 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 441 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 442 | DATA_STYPE hi; \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 443 | DATA_STYPE lo \ |
florian | 45f8de6 | 2015-03-12 10:21:29 +0000 | [diff] [blame] | 444 | = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ |
| 445 | * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 446 | DATA_S2TYPE rr \ |
| 447 | = NARROWto2S( \ |
| 448 | ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ |
| 449 | * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ |
| 450 | hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 451 | cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ |
| 452 | pf = parity_table[(UChar)lo]; \ |
| 453 | af = 0; /* undefined */ \ |
| 454 | zf = (lo == 0) << 6; \ |
| 455 | sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| 456 | of = cf << 11; \ |
| 457 | return cf | pf | af | zf | sf | of; \ |
| 458 | } \ |
| 459 | } |
| 460 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 461 | /*-------------------------------------------------------------*/ |
| 462 | |
| 463 | #define ACTIONS_UMULQ \ |
| 464 | { \ |
| 465 | PREAMBLE(64); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 466 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 467 | ULong lo, hi; \ |
| 468 | mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ |
| 469 | cf = (hi != 0); \ |
| 470 | pf = parity_table[(UChar)lo]; \ |
| 471 | af = 0; /* undefined */ \ |
| 472 | zf = (lo == 0) << 6; \ |
| 473 | sf = lshift(lo, 8 - 64) & 0x80; \ |
| 474 | of = cf << 11; \ |
| 475 | return cf | pf | af | zf | sf | of; \ |
| 476 | } \ |
| 477 | } |
| 478 | |
| 479 | /*-------------------------------------------------------------*/ |
| 480 | |
| 481 | #define ACTIONS_SMULQ \ |
| 482 | { \ |
| 483 | PREAMBLE(64); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 484 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 485 | Long lo, hi; \ |
| 486 | mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ |
| 487 | cf = (hi != (lo >>/*s*/ (64-1))); \ |
| 488 | pf = parity_table[(UChar)lo]; \ |
| 489 | af = 0; /* undefined */ \ |
| 490 | zf = (lo == 0) << 6; \ |
| 491 | sf = lshift(lo, 8 - 64) & 0x80; \ |
| 492 | of = cf << 11; \ |
| 493 | return cf | pf | af | zf | sf | of; \ |
| 494 | } \ |
| 495 | } |
| 496 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 497 | /*-------------------------------------------------------------*/ |
| 498 | |
| 499 | #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \ |
| 500 | { \ |
| 501 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 502 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 503 | cf = 0; \ |
| 504 | pf = 0; \ |
| 505 | af = 0; \ |
| 506 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 507 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 508 | of = 0; \ |
| 509 | return cf | pf | af | zf | sf | of; \ |
| 510 | } \ |
| 511 | } |
| 512 | |
| 513 | /*-------------------------------------------------------------*/ |
| 514 | |
| 515 | #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \ |
| 516 | { \ |
| 517 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 518 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 519 | cf = ((DATA_UTYPE)CC_DEP2 != 0); \ |
| 520 | pf = 0; \ |
| 521 | af = 0; \ |
| 522 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 523 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 524 | of = 0; \ |
| 525 | return cf | pf | af | zf | sf | of; \ |
| 526 | } \ |
| 527 | } |
| 528 | |
| 529 | /*-------------------------------------------------------------*/ |
| 530 | |
| 531 | #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \ |
| 532 | { \ |
| 533 | PREAMBLE(DATA_BITS); \ |
| 534 | { Long cf, pf, af, zf, sf, of; \ |
| 535 | cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| 536 | pf = 0; \ |
| 537 | af = 0; \ |
| 538 | zf = 0; \ |
| 539 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 540 | of = 0; \ |
| 541 | return cf | pf | af | zf | sf | of; \ |
| 542 | } \ |
| 543 | } |
| 544 | |
| 545 | /*-------------------------------------------------------------*/ |
| 546 | |
| 547 | #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \ |
| 548 | { \ |
| 549 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 550 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 551 | cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| 552 | pf = 0; \ |
| 553 | af = 0; \ |
| 554 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 555 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 556 | of = 0; \ |
| 557 | return cf | pf | af | zf | sf | of; \ |
| 558 | } \ |
| 559 | } |
| 560 | |
| 561 | /*-------------------------------------------------------------*/ |
| 562 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 563 | #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \ |
| 564 | { \ |
| 565 | PREAMBLE(DATA_BITS); \ |
| 566 | { ULong ocf; /* o or c */ \ |
| 567 | ULong argL, argR, oldOC, res; \ |
| 568 | oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \ |
| 569 | argL = CC_DEP1; \ |
| 570 | argR = CC_DEP2 ^ oldOC; \ |
| 571 | res = (argL + argR) + oldOC; \ |
| 572 | if (oldOC) \ |
| 573 | ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ |
| 574 | else \ |
| 575 | ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| 576 | return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \ |
| 577 | | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \ |
| 578 | } \ |
| 579 | } |
| 580 | |
| 581 | /*-------------------------------------------------------------*/ |
| 582 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 583 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 584 | #if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 585 | |
| 586 | static Bool initted = False; |
| 587 | |
| 588 | /* C flag, fast route */ |
| 589 | static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; |
| 590 | /* C flag, slow route */ |
| 591 | static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; |
| 592 | /* table for calculate_cond */ |
| 593 | static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; |
| 594 | /* total entry counts for calc_all, calc_c, calc_cond. */ |
| 595 | static UInt n_calc_all = 0; |
| 596 | static UInt n_calc_c = 0; |
| 597 | static UInt n_calc_cond = 0; |
| 598 | |
| 599 | #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) |
| 600 | |
| 601 | |
| 602 | static void showCounts ( void ) |
| 603 | { |
| 604 | Int op, co; |
florian | 5df8ab0 | 2012-10-13 19:34:19 +0000 | [diff] [blame] | 605 | HChar ch; |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 606 | vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 607 | n_calc_all, n_calc_cond, n_calc_c); |
| 608 | |
| 609 | vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" |
| 610 | " S NS P NP L NL LE NLE\n"); |
| 611 | vex_printf(" -----------------------------------------------------" |
| 612 | "----------------------------------------\n"); |
| 613 | for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| 614 | |
| 615 | ch = ' '; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 616 | if (op > 0 && (op-1) % 4 == 0) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 617 | ch = 'B'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 618 | if (op > 0 && (op-1) % 4 == 1) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 619 | ch = 'W'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 620 | if (op > 0 && (op-1) % 4 == 2) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 621 | ch = 'L'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 622 | if (op > 0 && (op-1) % 4 == 3) |
| 623 | ch = 'Q'; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 624 | |
| 625 | vex_printf("%2d%c: ", op, ch); |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 626 | vex_printf("%6u ", tabc_slow[op]); |
| 627 | vex_printf("%6u ", tabc_fast[op]); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 628 | for (co = 0; co < 16; co++) { |
| 629 | Int n = tab_cond[op][co]; |
| 630 | if (n >= 1000) { |
| 631 | vex_printf(" %3dK", n / 1000); |
| 632 | } else |
| 633 | if (n >= 0) { |
| 634 | vex_printf(" %3d ", n ); |
| 635 | } else { |
| 636 | vex_printf(" "); |
| 637 | } |
| 638 | } |
| 639 | vex_printf("\n"); |
| 640 | } |
| 641 | vex_printf("\n"); |
| 642 | } |
| 643 | |
| 644 | static void initCounts ( void ) |
| 645 | { |
| 646 | Int op, co; |
| 647 | initted = True; |
| 648 | for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| 649 | tabc_fast[op] = tabc_slow[op] = 0; |
| 650 | for (co = 0; co < 16; co++) |
| 651 | tab_cond[op][co] = 0; |
| 652 | } |
| 653 | } |
| 654 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 655 | #endif /* PROFILE_RFLAGS */ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 656 | |
| 657 | |
| 658 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 659 | /* Calculate all the 6 flags from the supplied thunk parameters. |
| 660 | Worker function, not directly called from generated code. */ |
| 661 | static |
| 662 | ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, |
| 663 | ULong cc_dep1_formal, |
| 664 | ULong cc_dep2_formal, |
| 665 | ULong cc_ndep_formal ) |
| 666 | { |
| 667 | switch (cc_op) { |
| 668 | case AMD64G_CC_OP_COPY: |
| 669 | return cc_dep1_formal |
| 670 | & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
| 671 | | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); |
| 672 | |
| 673 | case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); |
| 674 | case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); |
| 675 | case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 676 | case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 677 | |
| 678 | case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); |
| 679 | case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); |
| 680 | case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 681 | case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 682 | |
| 683 | case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); |
| 684 | case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); |
| 685 | case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); |
| 686 | case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); |
| 687 | |
| 688 | case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); |
| 689 | case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); |
| 690 | case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 691 | case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 692 | |
| 693 | case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); |
| 694 | case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); |
| 695 | case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); |
| 696 | case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); |
| 697 | |
| 698 | case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); |
| 699 | case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); |
| 700 | case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 701 | case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 702 | |
| 703 | case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); |
| 704 | case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); |
| 705 | case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 706 | case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 707 | |
| 708 | case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); |
| 709 | case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); |
| 710 | case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 711 | case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 712 | |
| 713 | case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); |
| 714 | case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); |
| 715 | case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); |
sewardj | a6b93d1 | 2005-02-17 09:28:28 +0000 | [diff] [blame] | 716 | case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 717 | |
| 718 | case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); |
| 719 | case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); |
| 720 | case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 721 | case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 722 | |
| 723 | case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); |
| 724 | case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); |
| 725 | case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 726 | case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 727 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 728 | case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, |
| 729 | UShort, toUShort ); |
| 730 | case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, |
| 731 | UInt, toUInt ); |
| 732 | case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, |
| 733 | ULong, idULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 734 | |
sewardj | 8bdb89a | 2005-05-05 21:46:50 +0000 | [diff] [blame] | 735 | case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; |
| 736 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 737 | case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, |
| 738 | Short, toUShort ); |
| 739 | case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, |
| 740 | Int, toUInt ); |
| 741 | case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, |
| 742 | Long, idULong ); |
| 743 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 744 | case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 745 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 746 | case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt ); |
| 747 | case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong ); |
| 748 | |
| 749 | case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt ); |
| 750 | case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong ); |
| 751 | |
| 752 | case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt ); |
| 753 | case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong ); |
| 754 | |
| 755 | case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt ); |
| 756 | case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong ); |
| 757 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 758 | case AMD64G_CC_OP_ADCX32: ACTIONS_ADX( 32, UInt, C ); |
| 759 | case AMD64G_CC_OP_ADCX64: ACTIONS_ADX( 64, ULong, C ); |
| 760 | |
| 761 | case AMD64G_CC_OP_ADOX32: ACTIONS_ADX( 32, UInt, O ); |
| 762 | case AMD64G_CC_OP_ADOX64: ACTIONS_ADX( 64, ULong, O ); |
| 763 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 764 | default: |
| 765 | /* shouldn't really make these calls from generated code */ |
| 766 | vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 767 | "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 768 | cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); |
| 769 | vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); |
| 770 | } |
| 771 | } |
| 772 | |
| 773 | |
| 774 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 775 | /* Calculate all the 6 flags from the supplied thunk parameters. */ |
| 776 | ULong amd64g_calculate_rflags_all ( ULong cc_op, |
| 777 | ULong cc_dep1, |
| 778 | ULong cc_dep2, |
| 779 | ULong cc_ndep ) |
| 780 | { |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 781 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 782 | if (!initted) initCounts(); |
| 783 | n_calc_all++; |
| 784 | if (SHOW_COUNTS_NOW) showCounts(); |
| 785 | # endif |
| 786 | return |
| 787 | amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| 788 | } |
| 789 | |
| 790 | |
| 791 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 792 | /* Calculate just the carry flag from the supplied thunk parameters. */ |
| 793 | ULong amd64g_calculate_rflags_c ( ULong cc_op, |
| 794 | ULong cc_dep1, |
| 795 | ULong cc_dep2, |
| 796 | ULong cc_ndep ) |
| 797 | { |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 798 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 799 | if (!initted) initCounts(); |
| 800 | n_calc_c++; |
| 801 | tabc_fast[cc_op]++; |
| 802 | if (SHOW_COUNTS_NOW) showCounts(); |
| 803 | # endif |
| 804 | |
| 805 | /* Fast-case some common ones. */ |
| 806 | switch (cc_op) { |
sewardj | 7fc494b | 2005-05-05 12:05:11 +0000 | [diff] [blame] | 807 | case AMD64G_CC_OP_COPY: |
| 808 | return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 809 | case AMD64G_CC_OP_LOGICQ: |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 810 | case AMD64G_CC_OP_LOGICL: |
| 811 | case AMD64G_CC_OP_LOGICW: |
| 812 | case AMD64G_CC_OP_LOGICB: |
| 813 | return 0; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 814 | // case AMD64G_CC_OP_SUBL: |
| 815 | // return ((UInt)cc_dep1) < ((UInt)cc_dep2) |
| 816 | // ? AMD64G_CC_MASK_C : 0; |
| 817 | // case AMD64G_CC_OP_SUBW: |
| 818 | // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) |
| 819 | // ? AMD64G_CC_MASK_C : 0; |
| 820 | // case AMD64G_CC_OP_SUBB: |
| 821 | // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) |
| 822 | // ? AMD64G_CC_MASK_C : 0; |
| 823 | // case AMD64G_CC_OP_INCL: |
| 824 | // case AMD64G_CC_OP_DECL: |
| 825 | // return cc_ndep & AMD64G_CC_MASK_C; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 826 | default: |
| 827 | break; |
| 828 | } |
| 829 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 830 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 831 | tabc_fast[cc_op]--; |
| 832 | tabc_slow[cc_op]++; |
| 833 | # endif |
| 834 | |
| 835 | return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) |
| 836 | & AMD64G_CC_MASK_C; |
| 837 | } |
| 838 | |
| 839 | |
| 840 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 841 | /* returns 1 or 0 */ |
| 842 | ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, |
| 843 | ULong cc_op, |
| 844 | ULong cc_dep1, |
| 845 | ULong cc_dep2, |
| 846 | ULong cc_ndep ) |
| 847 | { |
| 848 | ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, |
| 849 | cc_dep2, cc_ndep); |
| 850 | ULong of,sf,zf,cf,pf; |
| 851 | ULong inv = cond & 1; |
| 852 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 853 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 854 | if (!initted) initCounts(); |
| 855 | tab_cond[cc_op][cond]++; |
| 856 | n_calc_cond++; |
| 857 | if (SHOW_COUNTS_NOW) showCounts(); |
| 858 | # endif |
| 859 | |
| 860 | switch (cond) { |
| 861 | case AMD64CondNO: |
| 862 | case AMD64CondO: /* OF == 1 */ |
| 863 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 864 | return 1 & (inv ^ of); |
| 865 | |
| 866 | case AMD64CondNZ: |
| 867 | case AMD64CondZ: /* ZF == 1 */ |
| 868 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 869 | return 1 & (inv ^ zf); |
| 870 | |
| 871 | case AMD64CondNB: |
| 872 | case AMD64CondB: /* CF == 1 */ |
| 873 | cf = rflags >> AMD64G_CC_SHIFT_C; |
| 874 | return 1 & (inv ^ cf); |
| 875 | break; |
| 876 | |
| 877 | case AMD64CondNBE: |
| 878 | case AMD64CondBE: /* (CF or ZF) == 1 */ |
| 879 | cf = rflags >> AMD64G_CC_SHIFT_C; |
| 880 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 881 | return 1 & (inv ^ (cf | zf)); |
| 882 | break; |
| 883 | |
| 884 | case AMD64CondNS: |
| 885 | case AMD64CondS: /* SF == 1 */ |
| 886 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 887 | return 1 & (inv ^ sf); |
| 888 | |
| 889 | case AMD64CondNP: |
| 890 | case AMD64CondP: /* PF == 1 */ |
| 891 | pf = rflags >> AMD64G_CC_SHIFT_P; |
| 892 | return 1 & (inv ^ pf); |
| 893 | |
| 894 | case AMD64CondNL: |
| 895 | case AMD64CondL: /* (SF xor OF) == 1 */ |
| 896 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 897 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 898 | return 1 & (inv ^ (sf ^ of)); |
| 899 | break; |
| 900 | |
| 901 | case AMD64CondNLE: |
| 902 | case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ |
| 903 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 904 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 905 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 906 | return 1 & (inv ^ ((sf ^ of) | zf)); |
| 907 | break; |
| 908 | |
| 909 | default: |
| 910 | /* shouldn't really make these calls from generated code */ |
| 911 | vex_printf("amd64g_calculate_condition" |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 912 | "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 913 | cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| 914 | vpanic("amd64g_calculate_condition"); |
| 915 | } |
| 916 | } |
| 917 | |
| 918 | |
| 919 | /* VISIBLE TO LIBVEX CLIENT */ |
florian | efa834a | 2012-11-24 21:07:14 +0000 | [diff] [blame] | 920 | ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state ) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 921 | { |
| 922 | ULong rflags = amd64g_calculate_rflags_all_WRK( |
| 923 | vex_state->guest_CC_OP, |
| 924 | vex_state->guest_CC_DEP1, |
| 925 | vex_state->guest_CC_DEP2, |
| 926 | vex_state->guest_CC_NDEP |
| 927 | ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 928 | Long dflag = vex_state->guest_DFLAG; |
| 929 | vassert(dflag == 1 || dflag == -1); |
| 930 | if (dflag == -1) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 931 | rflags |= (1<<10); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 932 | if (vex_state->guest_IDFLAG == 1) |
| 933 | rflags |= (1<<21); |
sewardj | 5e120aa | 2010-09-28 15:59:04 +0000 | [diff] [blame] | 934 | if (vex_state->guest_ACFLAG == 1) |
| 935 | rflags |= (1<<18); |
| 936 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 937 | return rflags; |
| 938 | } |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 939 | |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 940 | /* VISIBLE TO LIBVEX CLIENT */ |
| 941 | void |
sewardj | 3e5d82d | 2015-07-21 14:43:23 +0000 | [diff] [blame] | 942 | LibVEX_GuestAMD64_put_rflags ( ULong rflags, |
| 943 | /*MOD*/VexGuestAMD64State* vex_state ) |
| 944 | { |
| 945 | /* D flag */ |
| 946 | if (rflags & AMD64G_CC_MASK_D) { |
| 947 | vex_state->guest_DFLAG = -1; |
| 948 | rflags &= ~AMD64G_CC_MASK_D; |
| 949 | } |
| 950 | else |
| 951 | vex_state->guest_DFLAG = 1; |
| 952 | |
| 953 | /* ID flag */ |
| 954 | if (rflags & AMD64G_CC_MASK_ID) { |
| 955 | vex_state->guest_IDFLAG = 1; |
| 956 | rflags &= ~AMD64G_CC_MASK_ID; |
| 957 | } |
| 958 | else |
| 959 | vex_state->guest_IDFLAG = 0; |
| 960 | |
| 961 | /* AC flag */ |
| 962 | if (rflags & AMD64G_CC_MASK_AC) { |
| 963 | vex_state->guest_ACFLAG = 1; |
| 964 | rflags &= ~AMD64G_CC_MASK_AC; |
| 965 | } |
| 966 | else |
| 967 | vex_state->guest_ACFLAG = 0; |
| 968 | |
| 969 | UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z | |
| 970 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P; |
| 971 | vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| 972 | vex_state->guest_CC_DEP1 = rflags & cc_mask; |
| 973 | vex_state->guest_CC_DEP2 = 0; |
| 974 | vex_state->guest_CC_NDEP = 0; |
| 975 | } |
| 976 | |
| 977 | /* VISIBLE TO LIBVEX CLIENT */ |
| 978 | void |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 979 | LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, |
| 980 | /*MOD*/VexGuestAMD64State* vex_state ) |
| 981 | { |
| 982 | ULong oszacp = amd64g_calculate_rflags_all_WRK( |
| 983 | vex_state->guest_CC_OP, |
| 984 | vex_state->guest_CC_DEP1, |
| 985 | vex_state->guest_CC_DEP2, |
| 986 | vex_state->guest_CC_NDEP |
| 987 | ); |
| 988 | if (new_carry_flag & 1) { |
| 989 | oszacp |= AMD64G_CC_MASK_C; |
| 990 | } else { |
| 991 | oszacp &= ~AMD64G_CC_MASK_C; |
| 992 | } |
| 993 | vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| 994 | vex_state->guest_CC_DEP1 = oszacp; |
| 995 | vex_state->guest_CC_DEP2 = 0; |
| 996 | vex_state->guest_CC_NDEP = 0; |
| 997 | } |
| 998 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 999 | |
| 1000 | /*---------------------------------------------------------------*/ |
| 1001 | /*--- %rflags translation-time function specialisers. ---*/ |
| 1002 | /*--- These help iropt specialise calls the above run-time ---*/ |
| 1003 | /*--- %rflags functions. ---*/ |
| 1004 | /*---------------------------------------------------------------*/ |
| 1005 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1006 | /* Used by the optimiser to try specialisations. Returns an |
| 1007 | equivalent expression, or NULL if none. */ |
| 1008 | |
| 1009 | static Bool isU64 ( IRExpr* e, ULong n ) |
| 1010 | { |
sewardj | 65b17c6 | 2005-05-02 15:52:44 +0000 | [diff] [blame] | 1011 | return toBool( e->tag == Iex_Const |
| 1012 | && e->Iex.Const.con->tag == Ico_U64 |
| 1013 | && e->Iex.Const.con->Ico.U64 == n ); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1014 | } |
sewardj | 354e5c6 | 2005-01-27 20:12:52 +0000 | [diff] [blame] | 1015 | |
florian | 1ff4756 | 2012-10-21 02:09:51 +0000 | [diff] [blame] | 1016 | IRExpr* guest_amd64_spechelper ( const HChar* function_name, |
sewardj | be91791 | 2010-08-22 12:38:53 +0000 | [diff] [blame] | 1017 | IRExpr** args, |
| 1018 | IRStmt** precedingStmts, |
| 1019 | Int n_precedingStmts ) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 1020 | { |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1021 | # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) |
| 1022 | # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) |
| 1023 | # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1024 | # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1025 | # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) |
| 1026 | |
| 1027 | Int i, arity = 0; |
| 1028 | for (i = 0; args[i]; i++) |
| 1029 | arity++; |
| 1030 | # if 0 |
| 1031 | vex_printf("spec request:\n"); |
| 1032 | vex_printf(" %s ", function_name); |
| 1033 | for (i = 0; i < arity; i++) { |
| 1034 | vex_printf(" "); |
| 1035 | ppIRExpr(args[i]); |
| 1036 | } |
| 1037 | vex_printf("\n"); |
| 1038 | # endif |
| 1039 | |
| 1040 | /* --------- specialising "amd64g_calculate_condition" --------- */ |
| 1041 | |
| 1042 | if (vex_streq(function_name, "amd64g_calculate_condition")) { |
| 1043 | /* specialise calls to above "calculate condition" function */ |
| 1044 | IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; |
| 1045 | vassert(arity == 5); |
| 1046 | cond = args[0]; |
| 1047 | cc_op = args[1]; |
| 1048 | cc_dep1 = args[2]; |
| 1049 | cc_dep2 = args[3]; |
| 1050 | |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 1051 | /*---------------- ADDQ ----------------*/ |
| 1052 | |
| 1053 | if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { |
| 1054 | /* long long add, then Z --> test (dst+src == 0) */ |
| 1055 | return unop(Iop_1Uto64, |
| 1056 | binop(Iop_CmpEQ64, |
| 1057 | binop(Iop_Add64, cc_dep1, cc_dep2), |
| 1058 | mkU64(0))); |
| 1059 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1060 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1061 | /*---------------- ADDL ----------------*/ |
| 1062 | |
| 1063 | if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) { |
| 1064 | /* This is very commonly generated by Javascript JITs, for |
| 1065 | the idiom "do a 32-bit add and jump to out-of-line code if |
| 1066 | an overflow occurs". */ |
| 1067 | /* long add, then O (overflow) |
| 1068 | --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31] |
| 1069 | --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 |
| 1070 | --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 |
| 1071 | */ |
| 1072 | vassert(isIRAtom(cc_dep1)); |
| 1073 | vassert(isIRAtom(cc_dep2)); |
| 1074 | return |
| 1075 | binop(Iop_And64, |
| 1076 | binop(Iop_Shr64, |
| 1077 | binop(Iop_And64, |
| 1078 | unop(Iop_Not64, |
| 1079 | binop(Iop_Xor64, cc_dep1, cc_dep2)), |
| 1080 | binop(Iop_Xor64, |
| 1081 | cc_dep1, |
| 1082 | binop(Iop_Add64, cc_dep1, cc_dep2))), |
| 1083 | mkU8(31)), |
| 1084 | mkU64(1)); |
| 1085 | |
| 1086 | } |
| 1087 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1088 | /*---------------- SUBQ ----------------*/ |
| 1089 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1090 | /* 0, */ |
| 1091 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) { |
| 1092 | /* long long sub/cmp, then O (overflow) |
| 1093 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63] |
| 1094 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63 |
| 1095 | */ |
| 1096 | vassert(isIRAtom(cc_dep1)); |
| 1097 | vassert(isIRAtom(cc_dep2)); |
| 1098 | return binop(Iop_Shr64, |
| 1099 | binop(Iop_And64, |
| 1100 | binop(Iop_Xor64, cc_dep1, cc_dep2), |
| 1101 | binop(Iop_Xor64, |
| 1102 | cc_dep1, |
| 1103 | binop(Iop_Sub64, cc_dep1, cc_dep2))), |
mjw | d937beb | 2015-05-27 12:33:41 +0000 | [diff] [blame] | 1104 | mkU8(63)); |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1105 | } |
| 1106 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) { |
| 1107 | /* No action. Never yet found a test case. */ |
| 1108 | } |
| 1109 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1110 | /* 2, 3 */ |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1111 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { |
| 1112 | /* long long sub/cmp, then B (unsigned less than) |
| 1113 | --> test dst <u src */ |
| 1114 | return unop(Iop_1Uto64, |
| 1115 | binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); |
| 1116 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1117 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { |
| 1118 | /* long long sub/cmp, then NB (unsigned greater than or equal) |
| 1119 | --> test src <=u dst */ |
| 1120 | /* Note, args are opposite way round from the usual */ |
| 1121 | return unop(Iop_1Uto64, |
| 1122 | binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); |
| 1123 | } |
| 1124 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1125 | /* 4, 5 */ |
| 1126 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { |
| 1127 | /* long long sub/cmp, then Z --> test dst==src */ |
sewardj | 3cfd1f0 | 2013-08-07 09:45:08 +0000 | [diff] [blame] | 1128 | return unop(Iop_1Uto64, |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1129 | binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); |
| 1130 | } |
| 1131 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { |
| 1132 | /* long long sub/cmp, then NZ --> test dst!=src */ |
| 1133 | return unop(Iop_1Uto64, |
| 1134 | binop(Iop_CmpNE64,cc_dep1,cc_dep2)); |
sewardj | 3cfd1f0 | 2013-08-07 09:45:08 +0000 | [diff] [blame] | 1135 | } |
| 1136 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1137 | /* 6, 7 */ |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1138 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { |
| 1139 | /* long long sub/cmp, then BE (unsigned less than or equal) |
| 1140 | --> test dst <=u src */ |
| 1141 | return unop(Iop_1Uto64, |
| 1142 | binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); |
| 1143 | } |
sewardj | 3a05a15 | 2012-02-23 07:36:43 +0000 | [diff] [blame] | 1144 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) { |
| 1145 | /* long long sub/cmp, then NBE (unsigned greater than) |
| 1146 | --> test !(dst <=u src) */ |
| 1147 | return binop(Iop_Xor64, |
| 1148 | unop(Iop_1Uto64, |
| 1149 | binop(Iop_CmpLE64U, cc_dep1, cc_dep2)), |
| 1150 | mkU64(1)); |
| 1151 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1152 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1153 | /* 8, 9 */ |
| 1154 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) { |
| 1155 | /* long long sub/cmp, then S (negative) |
| 1156 | --> (dst-src)[63] |
| 1157 | --> (dst-src) >>u 63 */ |
| 1158 | return binop(Iop_Shr64, |
| 1159 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1160 | mkU8(63)); |
| 1161 | } |
| 1162 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) { |
| 1163 | /* long long sub/cmp, then NS (not negative) |
| 1164 | --> (dst-src)[63] ^ 1 |
| 1165 | --> ((dst-src) >>u 63) ^ 1 */ |
| 1166 | return binop(Iop_Xor64, |
| 1167 | binop(Iop_Shr64, |
| 1168 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1169 | mkU8(63)), |
| 1170 | mkU64(1)); |
| 1171 | } |
| 1172 | |
| 1173 | /* 12, 13 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1174 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { |
| 1175 | /* long long sub/cmp, then L (signed less than) |
| 1176 | --> test dst <s src */ |
| 1177 | return unop(Iop_1Uto64, |
| 1178 | binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); |
| 1179 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1180 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) { |
| 1181 | /* long long sub/cmp, then NL (signed greater than or equal) |
| 1182 | --> test dst >=s src |
| 1183 | --> test src <=s dst */ |
| 1184 | return unop(Iop_1Uto64, |
| 1185 | binop(Iop_CmpLE64S, cc_dep2, cc_dep1)); |
| 1186 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1187 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1188 | /* 14, 15 */ |
| 1189 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) { |
| 1190 | /* long long sub/cmp, then LE (signed less than or equal) |
| 1191 | --> test dst <=s src */ |
| 1192 | return unop(Iop_1Uto64, |
| 1193 | binop(Iop_CmpLE64S, cc_dep1, cc_dep2)); |
| 1194 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1195 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) { |
| 1196 | /* long sub/cmp, then NLE (signed greater than) |
| 1197 | --> test !(dst <=s src) |
| 1198 | --> test (dst >s src) |
| 1199 | --> test (src <s dst) */ |
| 1200 | return unop(Iop_1Uto64, |
| 1201 | binop(Iop_CmpLT64S, cc_dep2, cc_dep1)); |
| 1202 | |
| 1203 | } |
| 1204 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1205 | /*---------------- SUBL ----------------*/ |
| 1206 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1207 | /* 0, */ |
| 1208 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) { |
| 1209 | /* This is very commonly generated by Javascript JITs, for |
| 1210 | the idiom "do a 32-bit subtract and jump to out-of-line |
| 1211 | code if an overflow occurs". */ |
| 1212 | /* long sub/cmp, then O (overflow) |
| 1213 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31] |
| 1214 | --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1 |
| 1215 | */ |
| 1216 | vassert(isIRAtom(cc_dep1)); |
| 1217 | vassert(isIRAtom(cc_dep2)); |
| 1218 | return |
| 1219 | binop(Iop_And64, |
| 1220 | binop(Iop_Shr64, |
| 1221 | binop(Iop_And64, |
| 1222 | binop(Iop_Xor64, cc_dep1, cc_dep2), |
| 1223 | binop(Iop_Xor64, |
| 1224 | cc_dep1, |
| 1225 | binop(Iop_Sub64, cc_dep1, cc_dep2))), |
| 1226 | mkU8(31)), |
| 1227 | mkU64(1)); |
| 1228 | } |
| 1229 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) { |
| 1230 | /* No action. Never yet found a test case. */ |
| 1231 | } |
| 1232 | |
| 1233 | /* 2, 3 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1234 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { |
| 1235 | /* long sub/cmp, then B (unsigned less than) |
| 1236 | --> test dst <u src */ |
| 1237 | return unop(Iop_1Uto64, |
| 1238 | binop(Iop_CmpLT32U, |
| 1239 | unop(Iop_64to32, cc_dep1), |
| 1240 | unop(Iop_64to32, cc_dep2))); |
| 1241 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1242 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) { |
| 1243 | /* long sub/cmp, then NB (unsigned greater than or equal) |
| 1244 | --> test src <=u dst */ |
| 1245 | /* Note, args are opposite way round from the usual */ |
| 1246 | return unop(Iop_1Uto64, |
| 1247 | binop(Iop_CmpLE32U, |
| 1248 | unop(Iop_64to32, cc_dep2), |
| 1249 | unop(Iop_64to32, cc_dep1))); |
| 1250 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1251 | |
| 1252 | /* 4, 5 */ |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 1253 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { |
| 1254 | /* long sub/cmp, then Z --> test dst==src */ |
| 1255 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1256 | binop(Iop_CmpEQ32, |
| 1257 | unop(Iop_64to32, cc_dep1), |
| 1258 | unop(Iop_64to32, cc_dep2))); |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1259 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1260 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { |
| 1261 | /* long sub/cmp, then NZ --> test dst!=src */ |
| 1262 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1263 | binop(Iop_CmpNE32, |
| 1264 | unop(Iop_64to32, cc_dep1), |
| 1265 | unop(Iop_64to32, cc_dep2))); |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 1266 | } |
| 1267 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1268 | /* 6, 7 */ |
| 1269 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { |
| 1270 | /* long sub/cmp, then BE (unsigned less than or equal) |
| 1271 | --> test dst <=u src */ |
| 1272 | return unop(Iop_1Uto64, |
| 1273 | binop(Iop_CmpLE32U, |
| 1274 | unop(Iop_64to32, cc_dep1), |
| 1275 | unop(Iop_64to32, cc_dep2))); |
| 1276 | } |
| 1277 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { |
| 1278 | /* long sub/cmp, then NBE (unsigned greater than) |
| 1279 | --> test src <u dst */ |
| 1280 | /* Note, args are opposite way round from the usual */ |
| 1281 | return unop(Iop_1Uto64, |
| 1282 | binop(Iop_CmpLT32U, |
| 1283 | unop(Iop_64to32, cc_dep2), |
| 1284 | unop(Iop_64to32, cc_dep1))); |
| 1285 | } |
| 1286 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1287 | /* 8, 9 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1288 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1289 | /* long sub/cmp, then S (negative) |
| 1290 | --> (dst-src)[31] |
| 1291 | --> ((dst -64 src) >>u 31) & 1 |
| 1292 | Pointless to narrow the args to 32 bit before the subtract. */ |
| 1293 | return binop(Iop_And64, |
| 1294 | binop(Iop_Shr64, |
| 1295 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1296 | mkU8(31)), |
| 1297 | mkU64(1)); |
| 1298 | } |
| 1299 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) { |
| 1300 | /* long sub/cmp, then NS (not negative) |
| 1301 | --> (dst-src)[31] ^ 1 |
| 1302 | --> (((dst -64 src) >>u 31) & 1) ^ 1 |
| 1303 | Pointless to narrow the args to 32 bit before the subtract. */ |
| 1304 | return binop(Iop_Xor64, |
| 1305 | binop(Iop_And64, |
| 1306 | binop(Iop_Shr64, |
| 1307 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1308 | mkU8(31)), |
| 1309 | mkU64(1)), |
| 1310 | mkU64(1)); |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1311 | } |
| 1312 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1313 | /* 12, 13 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1314 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { |
| 1315 | /* long sub/cmp, then L (signed less than) |
| 1316 | --> test dst <s src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1317 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1318 | binop(Iop_CmpLT32S, |
| 1319 | unop(Iop_64to32, cc_dep1), |
| 1320 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1321 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1322 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) { |
| 1323 | /* long sub/cmp, then NL (signed greater than or equal) |
| 1324 | --> test dst >=s src |
| 1325 | --> test src <=s dst */ |
| 1326 | return unop(Iop_1Uto64, |
| 1327 | binop(Iop_CmpLE32S, |
| 1328 | unop(Iop_64to32, cc_dep2), |
| 1329 | unop(Iop_64to32, cc_dep1))); |
| 1330 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1331 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1332 | /* 14, 15 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1333 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1334 | /* long sub/cmp, then LE (signed less than or equal) |
| 1335 | --> test dst <=s src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1336 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1337 | binop(Iop_CmpLE32S, |
| 1338 | unop(Iop_64to32, cc_dep1), |
| 1339 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1340 | |
| 1341 | } |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1342 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { |
| 1343 | /* long sub/cmp, then NLE (signed greater than) |
| 1344 | --> test !(dst <=s src) |
| 1345 | --> test (dst >s src) |
| 1346 | --> test (src <s dst) */ |
| 1347 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1348 | binop(Iop_CmpLT32S, |
| 1349 | unop(Iop_64to32, cc_dep2), |
| 1350 | unop(Iop_64to32, cc_dep1))); |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1351 | |
| 1352 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1353 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1354 | /*---------------- SUBW ----------------*/ |
| 1355 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1356 | /* 4, 5 */ |
sewardj | a82b476 | 2005-05-06 16:30:21 +0000 | [diff] [blame] | 1357 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { |
| 1358 | /* word sub/cmp, then Z --> test dst==src */ |
| 1359 | return unop(Iop_1Uto64, |
| 1360 | binop(Iop_CmpEQ16, |
| 1361 | unop(Iop_64to16,cc_dep1), |
| 1362 | unop(Iop_64to16,cc_dep2))); |
| 1363 | } |
sewardj | beb5291 | 2008-05-02 22:15:12 +0000 | [diff] [blame] | 1364 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { |
| 1365 | /* word sub/cmp, then NZ --> test dst!=src */ |
| 1366 | return unop(Iop_1Uto64, |
| 1367 | binop(Iop_CmpNE16, |
| 1368 | unop(Iop_64to16,cc_dep1), |
| 1369 | unop(Iop_64to16,cc_dep2))); |
| 1370 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1371 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1372 | /* 6, */ |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1373 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) { |
| 1374 | /* word sub/cmp, then BE (unsigned less than or equal) |
| 1375 | --> test dst <=u src */ |
| 1376 | return unop(Iop_1Uto64, |
| 1377 | binop(Iop_CmpLE64U, |
| 1378 | binop(Iop_Shl64, cc_dep1, mkU8(48)), |
| 1379 | binop(Iop_Shl64, cc_dep2, mkU8(48)))); |
| 1380 | } |
| 1381 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1382 | /* 8, 9 */ |
| 1383 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondS) |
| 1384 | && isU64(cc_dep2, 0)) { |
| 1385 | /* word sub/cmp of zero, then S --> test (dst-0 <s 0) |
| 1386 | --> test dst <s 0 |
| 1387 | --> (ULong)dst[15] |
| 1388 | This is yet another scheme by which clang figures out if the |
| 1389 | top bit of a word is 1 or 0. See also LOGICB/CondS below. */ |
| 1390 | /* Note: isU64(cc_dep2, 0) is correct, even though this is |
| 1391 | for an 16-bit comparison, since the args to the helper |
| 1392 | function are always U64s. */ |
| 1393 | return binop(Iop_And64, |
| 1394 | binop(Iop_Shr64,cc_dep1,mkU8(15)), |
| 1395 | mkU64(1)); |
| 1396 | } |
| 1397 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNS) |
| 1398 | && isU64(cc_dep2, 0)) { |
| 1399 | /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0) |
| 1400 | --> test !(dst <s 0) |
| 1401 | --> (ULong) !dst[15] |
| 1402 | */ |
| 1403 | return binop(Iop_Xor64, |
| 1404 | binop(Iop_And64, |
| 1405 | binop(Iop_Shr64,cc_dep1,mkU8(15)), |
| 1406 | mkU64(1)), |
| 1407 | mkU64(1)); |
| 1408 | } |
| 1409 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1410 | /* 14, */ |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1411 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1412 | /* word sub/cmp, then LE (signed less than or equal) |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1413 | --> test dst <=s src */ |
| 1414 | return unop(Iop_1Uto64, |
| 1415 | binop(Iop_CmpLE64S, |
| 1416 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1417 | binop(Iop_Shl64,cc_dep2,mkU8(48)))); |
| 1418 | |
| 1419 | } |
| 1420 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1421 | /*---------------- SUBB ----------------*/ |
| 1422 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1423 | /* 2, 3 */ |
| 1424 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) { |
| 1425 | /* byte sub/cmp, then B (unsigned less than) |
| 1426 | --> test dst <u src */ |
| 1427 | return unop(Iop_1Uto64, |
| 1428 | binop(Iop_CmpLT64U, |
| 1429 | binop(Iop_And64, cc_dep1, mkU64(0xFF)), |
| 1430 | binop(Iop_And64, cc_dep2, mkU64(0xFF)))); |
| 1431 | } |
| 1432 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) { |
| 1433 | /* byte sub/cmp, then NB (unsigned greater than or equal) |
| 1434 | --> test src <=u dst */ |
| 1435 | /* Note, args are opposite way round from the usual */ |
| 1436 | return unop(Iop_1Uto64, |
| 1437 | binop(Iop_CmpLE64U, |
| 1438 | binop(Iop_And64, cc_dep2, mkU64(0xFF)), |
| 1439 | binop(Iop_And64, cc_dep1, mkU64(0xFF)))); |
| 1440 | } |
| 1441 | |
| 1442 | /* 4, 5 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1443 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { |
| 1444 | /* byte sub/cmp, then Z --> test dst==src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1445 | return unop(Iop_1Uto64, |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1446 | binop(Iop_CmpEQ8, |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1447 | unop(Iop_64to8,cc_dep1), |
| 1448 | unop(Iop_64to8,cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1449 | } |
sewardj | 32d615b | 2006-08-25 12:52:19 +0000 | [diff] [blame] | 1450 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { |
| 1451 | /* byte sub/cmp, then NZ --> test dst!=src */ |
| 1452 | return unop(Iop_1Uto64, |
| 1453 | binop(Iop_CmpNE8, |
| 1454 | unop(Iop_64to8,cc_dep1), |
| 1455 | unop(Iop_64to8,cc_dep2))); |
| 1456 | } |
| 1457 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1458 | /* 6, */ |
sewardj | e430418 | 2011-06-06 10:17:46 +0000 | [diff] [blame] | 1459 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { |
| 1460 | /* byte sub/cmp, then BE (unsigned less than or equal) |
| 1461 | --> test dst <=u src */ |
| 1462 | return unop(Iop_1Uto64, |
| 1463 | binop(Iop_CmpLE64U, |
| 1464 | binop(Iop_And64, cc_dep1, mkU64(0xFF)), |
| 1465 | binop(Iop_And64, cc_dep2, mkU64(0xFF)))); |
| 1466 | } |
| 1467 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1468 | /* 8, 9 */ |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1469 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) |
| 1470 | && isU64(cc_dep2, 0)) { |
| 1471 | /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) |
| 1472 | --> test dst <s 0 |
| 1473 | --> (ULong)dst[7] |
| 1474 | This is yet another scheme by which gcc figures out if the |
| 1475 | top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ |
| 1476 | /* Note: isU64(cc_dep2, 0) is correct, even though this is |
| 1477 | for an 8-bit comparison, since the args to the helper |
| 1478 | function are always U64s. */ |
| 1479 | return binop(Iop_And64, |
| 1480 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1481 | mkU64(1)); |
| 1482 | } |
sewardj | cd538b4 | 2008-03-31 21:57:17 +0000 | [diff] [blame] | 1483 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) |
| 1484 | && isU64(cc_dep2, 0)) { |
| 1485 | /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) |
| 1486 | --> test !(dst <s 0) |
| 1487 | --> (ULong) !dst[7] |
| 1488 | */ |
| 1489 | return binop(Iop_Xor64, |
| 1490 | binop(Iop_And64, |
| 1491 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1492 | mkU64(1)), |
| 1493 | mkU64(1)); |
| 1494 | } |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1495 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1496 | /*---------------- LOGICQ ----------------*/ |
| 1497 | |
| 1498 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { |
| 1499 | /* long long and/or/xor, then Z --> test dst==0 */ |
| 1500 | return unop(Iop_1Uto64, |
| 1501 | binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); |
| 1502 | } |
sewardj | 0cd7473 | 2011-07-07 13:58:10 +0000 | [diff] [blame] | 1503 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { |
| 1504 | /* long long and/or/xor, then NZ --> test dst!=0 */ |
| 1505 | return unop(Iop_1Uto64, |
| 1506 | binop(Iop_CmpNE64, cc_dep1, mkU64(0))); |
| 1507 | } |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1508 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1509 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { |
| 1510 | /* long long and/or/xor, then L |
| 1511 | LOGIC sets SF and ZF according to the |
| 1512 | result and makes OF be zero. L computes SF ^ OF, but |
| 1513 | OF is zero, so this reduces to SF -- which will be 1 iff |
| 1514 | the result is < signed 0. Hence ... |
| 1515 | */ |
| 1516 | return unop(Iop_1Uto64, |
| 1517 | binop(Iop_CmpLT64S, |
| 1518 | cc_dep1, |
| 1519 | mkU64(0))); |
| 1520 | } |
| 1521 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1522 | /*---------------- LOGICL ----------------*/ |
| 1523 | |
| 1524 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { |
| 1525 | /* long and/or/xor, then Z --> test dst==0 */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1526 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1527 | binop(Iop_CmpEQ32, |
| 1528 | unop(Iop_64to32, cc_dep1), |
| 1529 | mkU32(0))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1530 | } |
sewardj | 005b4ef | 2005-07-20 01:12:48 +0000 | [diff] [blame] | 1531 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { |
| 1532 | /* long and/or/xor, then NZ --> test dst!=0 */ |
| 1533 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1534 | binop(Iop_CmpNE32, |
| 1535 | unop(Iop_64to32, cc_dep1), |
| 1536 | mkU32(0))); |
sewardj | 005b4ef | 2005-07-20 01:12:48 +0000 | [diff] [blame] | 1537 | } |
| 1538 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1539 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { |
| 1540 | /* long and/or/xor, then LE |
| 1541 | This is pretty subtle. LOGIC sets SF and ZF according to the |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1542 | result and makes OF be zero. LE computes (SF ^ OF) | ZF, but |
| 1543 | OF is zero, so this reduces to SF | ZF -- which will be 1 iff |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1544 | the result is <=signed 0. Hence ... |
| 1545 | */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1546 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1547 | binop(Iop_CmpLE32S, |
| 1548 | unop(Iop_64to32, cc_dep1), |
| 1549 | mkU32(0))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1550 | } |
| 1551 | |
sewardj | e430418 | 2011-06-06 10:17:46 +0000 | [diff] [blame] | 1552 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { |
| 1553 | /* long and/or/xor, then S --> (ULong)result[31] */ |
| 1554 | return binop(Iop_And64, |
| 1555 | binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| 1556 | mkU64(1)); |
| 1557 | } |
| 1558 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { |
| 1559 | /* long and/or/xor, then S --> (ULong) ~ result[31] */ |
| 1560 | return binop(Iop_Xor64, |
| 1561 | binop(Iop_And64, |
| 1562 | binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| 1563 | mkU64(1)), |
| 1564 | mkU64(1)); |
| 1565 | } |
| 1566 | |
sewardj | 61acf4c | 2012-04-25 14:33:03 +0000 | [diff] [blame] | 1567 | /*---------------- LOGICW ----------------*/ |
| 1568 | |
| 1569 | if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) { |
| 1570 | /* word and/or/xor, then Z --> test dst==0 */ |
| 1571 | return unop(Iop_1Uto64, |
| 1572 | binop(Iop_CmpEQ64, |
| 1573 | binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| 1574 | mkU64(0))); |
| 1575 | } |
| 1576 | if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) { |
| 1577 | /* word and/or/xor, then NZ --> test dst!=0 */ |
| 1578 | return unop(Iop_1Uto64, |
| 1579 | binop(Iop_CmpNE64, |
| 1580 | binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| 1581 | mkU64(0))); |
| 1582 | } |
| 1583 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1584 | /*---------------- LOGICB ----------------*/ |
| 1585 | |
| 1586 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { |
| 1587 | /* byte and/or/xor, then Z --> test dst==0 */ |
| 1588 | return unop(Iop_1Uto64, |
| 1589 | binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| 1590 | mkU64(0))); |
| 1591 | } |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1592 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { |
| 1593 | /* byte and/or/xor, then NZ --> test dst!=0 */ |
| 1594 | return unop(Iop_1Uto64, |
| 1595 | binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| 1596 | mkU64(0))); |
| 1597 | } |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1598 | |
sewardj | 346d9a1 | 2006-05-21 01:02:31 +0000 | [diff] [blame] | 1599 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { |
| 1600 | /* this is an idiom gcc sometimes uses to find out if the top |
| 1601 | bit of a byte register is set: eg testb %al,%al; js .. |
| 1602 | Since it just depends on the top bit of the byte, extract |
| 1603 | that bit and explicitly get rid of all the rest. This |
| 1604 | helps memcheck avoid false positives in the case where any |
| 1605 | of the other bits in the byte are undefined. */ |
| 1606 | /* byte and/or/xor, then S --> (UInt)result[7] */ |
| 1607 | return binop(Iop_And64, |
| 1608 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1609 | mkU64(1)); |
| 1610 | } |
sewardj | a6d0809 | 2011-03-27 22:16:08 +0000 | [diff] [blame] | 1611 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { |
| 1612 | /* byte and/or/xor, then NS --> (UInt)!result[7] */ |
| 1613 | return binop(Iop_Xor64, |
| 1614 | binop(Iop_And64, |
| 1615 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1616 | mkU64(1)), |
| 1617 | mkU64(1)); |
| 1618 | } |
sewardj | 346d9a1 | 2006-05-21 01:02:31 +0000 | [diff] [blame] | 1619 | |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1620 | /*---------------- INCB ----------------*/ |
| 1621 | |
| 1622 | if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { |
sewardj | 4df975f | 2010-02-28 04:51:02 +0000 | [diff] [blame] | 1623 | /* 8-bit inc, then LE --> sign bit of the arg */ |
| 1624 | return binop(Iop_And64, |
| 1625 | binop(Iop_Shr64, |
| 1626 | binop(Iop_Sub64, cc_dep1, mkU64(1)), |
| 1627 | mkU8(7)), |
| 1628 | mkU64(1)); |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1629 | } |
| 1630 | |
sewardj | 7784bd2 | 2006-12-29 01:54:36 +0000 | [diff] [blame] | 1631 | /*---------------- INCW ----------------*/ |
| 1632 | |
| 1633 | if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { |
| 1634 | /* 16-bit inc, then Z --> test dst == 0 */ |
| 1635 | return unop(Iop_1Uto64, |
| 1636 | binop(Iop_CmpEQ64, |
| 1637 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1638 | mkU64(0))); |
| 1639 | } |
| 1640 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1641 | /*---------------- DECL ----------------*/ |
| 1642 | |
| 1643 | if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { |
| 1644 | /* dec L, then Z --> test dst == 0 */ |
| 1645 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1646 | binop(Iop_CmpEQ32, |
| 1647 | unop(Iop_64to32, cc_dep1), |
| 1648 | mkU32(0))); |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1649 | } |
| 1650 | |
sewardj | b6d02ea | 2005-08-01 13:35:18 +0000 | [diff] [blame] | 1651 | /*---------------- DECW ----------------*/ |
| 1652 | |
| 1653 | if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { |
| 1654 | /* 16-bit dec, then NZ --> test dst != 0 */ |
| 1655 | return unop(Iop_1Uto64, |
| 1656 | binop(Iop_CmpNE64, |
| 1657 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1658 | mkU64(0))); |
| 1659 | } |
| 1660 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1661 | /*---------------- SHRQ ----------------*/ |
| 1662 | |
| 1663 | if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) { |
| 1664 | /* SHRQ, then Z --> test dep1 == 0 */ |
| 1665 | return unop(Iop_1Uto64, |
| 1666 | binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); |
| 1667 | } |
| 1668 | if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) { |
| 1669 | /* SHRQ, then NZ --> test dep1 != 0 */ |
| 1670 | return unop(Iop_1Uto64, |
| 1671 | binop(Iop_CmpNE64, cc_dep1, mkU64(0))); |
| 1672 | } |
| 1673 | |
Elliott Hughes | a0664b9 | 2017-04-18 17:46:52 -0700 | [diff] [blame] | 1674 | /*---------------- SHRL ----------------*/ |
| 1675 | |
| 1676 | if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) { |
| 1677 | /* SHRL, then Z --> test dep1 == 0 */ |
| 1678 | return unop(Iop_1Uto64, |
| 1679 | binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1), |
| 1680 | mkU32(0))); |
| 1681 | } |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1682 | if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNZ)) { |
| 1683 | /* SHRL, then NZ --> test dep1 != 0 */ |
| 1684 | return unop(Iop_1Uto64, |
| 1685 | binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1), |
| 1686 | mkU32(0))); |
| 1687 | } |
Elliott Hughes | a0664b9 | 2017-04-18 17:46:52 -0700 | [diff] [blame] | 1688 | |
sewardj | 7fc494b | 2005-05-05 12:05:11 +0000 | [diff] [blame] | 1689 | /*---------------- COPY ----------------*/ |
| 1690 | /* This can happen, as a result of amd64 FP compares: "comisd ... ; |
| 1691 | jbe" for example. */ |
| 1692 | |
| 1693 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && |
| 1694 | (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { |
| 1695 | /* COPY, then BE --> extract C and Z from dep1, and test (C |
| 1696 | or Z == 1). */ |
| 1697 | /* COPY, then NBE --> extract C and Z from dep1, and test (C |
| 1698 | or Z == 0). */ |
| 1699 | ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; |
| 1700 | return |
| 1701 | unop( |
| 1702 | Iop_1Uto64, |
| 1703 | binop( |
| 1704 | Iop_CmpEQ64, |
| 1705 | binop( |
| 1706 | Iop_And64, |
| 1707 | binop( |
| 1708 | Iop_Or64, |
| 1709 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| 1710 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) |
| 1711 | ), |
| 1712 | mkU64(1) |
| 1713 | ), |
| 1714 | mkU64(nnn) |
| 1715 | ) |
| 1716 | ); |
| 1717 | } |
| 1718 | |
sewardj | 9f05a64 | 2005-05-12 02:14:52 +0000 | [diff] [blame] | 1719 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { |
| 1720 | /* COPY, then B --> extract C dep1, and test (C == 1). */ |
| 1721 | return |
| 1722 | unop( |
| 1723 | Iop_1Uto64, |
| 1724 | binop( |
| 1725 | Iop_CmpNE64, |
| 1726 | binop( |
| 1727 | Iop_And64, |
| 1728 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| 1729 | mkU64(1) |
| 1730 | ), |
| 1731 | mkU64(0) |
| 1732 | ) |
| 1733 | ); |
| 1734 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1735 | |
sewardj | b235e5b | 2006-11-27 04:09:52 +0000 | [diff] [blame] | 1736 | if (isU64(cc_op, AMD64G_CC_OP_COPY) |
| 1737 | && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { |
| 1738 | /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ |
| 1739 | /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ |
| 1740 | UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; |
| 1741 | return |
| 1742 | unop( |
| 1743 | Iop_1Uto64, |
| 1744 | binop( |
| 1745 | Iop_CmpEQ64, |
| 1746 | binop( |
| 1747 | Iop_And64, |
| 1748 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), |
| 1749 | mkU64(1) |
| 1750 | ), |
| 1751 | mkU64(nnn) |
| 1752 | ) |
| 1753 | ); |
| 1754 | } |
| 1755 | |
| 1756 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { |
| 1757 | /* COPY, then P --> extract P from dep1, and test (P == 1). */ |
| 1758 | return |
| 1759 | unop( |
| 1760 | Iop_1Uto64, |
| 1761 | binop( |
| 1762 | Iop_CmpNE64, |
| 1763 | binop( |
| 1764 | Iop_And64, |
| 1765 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), |
| 1766 | mkU64(1) |
| 1767 | ), |
| 1768 | mkU64(0) |
| 1769 | ) |
| 1770 | ); |
| 1771 | } |
| 1772 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1773 | return NULL; |
| 1774 | } |
| 1775 | |
| 1776 | /* --------- specialising "amd64g_calculate_rflags_c" --------- */ |
| 1777 | |
| 1778 | if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { |
| 1779 | /* specialise calls to above "calculate_rflags_c" function */ |
| 1780 | IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; |
| 1781 | vassert(arity == 4); |
| 1782 | cc_op = args[0]; |
| 1783 | cc_dep1 = args[1]; |
| 1784 | cc_dep2 = args[2]; |
| 1785 | cc_ndep = args[3]; |
| 1786 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1787 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { |
| 1788 | /* C after sub denotes unsigned less than */ |
| 1789 | return unop(Iop_1Uto64, |
| 1790 | binop(Iop_CmpLT64U, |
| 1791 | cc_dep1, |
| 1792 | cc_dep2)); |
| 1793 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1794 | if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { |
| 1795 | /* C after sub denotes unsigned less than */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1796 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1797 | binop(Iop_CmpLT32U, |
| 1798 | unop(Iop_64to32, cc_dep1), |
| 1799 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1800 | } |
| 1801 | if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { |
| 1802 | /* C after sub denotes unsigned less than */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1803 | return unop(Iop_1Uto64, |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1804 | binop(Iop_CmpLT64U, |
| 1805 | binop(Iop_And64,cc_dep1,mkU64(0xFF)), |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1806 | binop(Iop_And64,cc_dep2,mkU64(0xFF)))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1807 | } |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1808 | if (isU64(cc_op, AMD64G_CC_OP_ADDQ)) { |
| 1809 | /* C after add denotes sum <u either arg */ |
| 1810 | return unop(Iop_1Uto64, |
| 1811 | binop(Iop_CmpLT64U, |
| 1812 | binop(Iop_Add64, cc_dep1, cc_dep2), |
| 1813 | cc_dep1)); |
| 1814 | } |
| 1815 | if (isU64(cc_op, AMD64G_CC_OP_ADDL)) { |
| 1816 | /* C after add denotes sum <u either arg */ |
| 1817 | return unop(Iop_1Uto64, |
| 1818 | binop(Iop_CmpLT32U, |
| 1819 | unop(Iop_64to32, binop(Iop_Add64, cc_dep1, cc_dep2)), |
| 1820 | unop(Iop_64to32, cc_dep1))); |
| 1821 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1822 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) |
| 1823 | || isU64(cc_op, AMD64G_CC_OP_LOGICL) |
| 1824 | || isU64(cc_op, AMD64G_CC_OP_LOGICW) |
| 1825 | || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { |
| 1826 | /* cflag after logic is zero */ |
| 1827 | return mkU64(0); |
| 1828 | } |
| 1829 | if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) |
| 1830 | || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { |
| 1831 | /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ |
| 1832 | return cc_ndep; |
| 1833 | } |
sewardj | 7784bd2 | 2006-12-29 01:54:36 +0000 | [diff] [blame] | 1834 | |
| 1835 | # if 0 |
| 1836 | if (cc_op->tag == Iex_Const) { |
| 1837 | vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); |
| 1838 | } |
| 1839 | # endif |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1840 | |
| 1841 | return NULL; |
| 1842 | } |
| 1843 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1844 | # undef unop |
| 1845 | # undef binop |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1846 | # undef mkU64 |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1847 | # undef mkU32 |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1848 | # undef mkU8 |
| 1849 | |
| 1850 | return NULL; |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 1851 | } |
| 1852 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1853 | |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 1854 | /*---------------------------------------------------------------*/ |
| 1855 | /*--- Supporting functions for x87 FPU activities. ---*/ |
| 1856 | /*---------------------------------------------------------------*/ |
| 1857 | |
sewardj | 4f9847d | 2005-07-25 11:58:34 +0000 | [diff] [blame] | 1858 | static inline Bool host_is_little_endian ( void ) |
| 1859 | { |
| 1860 | UInt x = 0x76543210; |
| 1861 | UChar* p = (UChar*)(&x); |
| 1862 | return toBool(*p == 0x10); |
| 1863 | } |
| 1864 | |
| 1865 | /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ |
| 1866 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 1867 | ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) |
| 1868 | { |
| 1869 | Bool mantissaIsZero; |
| 1870 | Int bexp; |
| 1871 | UChar sign; |
| 1872 | UChar* f64; |
| 1873 | |
| 1874 | vassert(host_is_little_endian()); |
| 1875 | |
| 1876 | /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ |
| 1877 | |
| 1878 | f64 = (UChar*)(&dbl); |
| 1879 | sign = toUChar( (f64[7] >> 7) & 1 ); |
| 1880 | |
| 1881 | /* First off, if the tag indicates the register was empty, |
| 1882 | return 1,0,sign,1 */ |
| 1883 | if (tag == 0) { |
| 1884 | /* vex_printf("Empty\n"); */ |
| 1885 | return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) |
| 1886 | | AMD64G_FC_MASK_C0; |
| 1887 | } |
| 1888 | |
| 1889 | bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); |
| 1890 | bexp &= 0x7FF; |
| 1891 | |
| 1892 | mantissaIsZero |
| 1893 | = toBool( |
| 1894 | (f64[6] & 0x0F) == 0 |
| 1895 | && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 |
| 1896 | ); |
| 1897 | |
| 1898 | /* If both exponent and mantissa are zero, the value is zero. |
| 1899 | Return 1,0,sign,0. */ |
| 1900 | if (bexp == 0 && mantissaIsZero) { |
| 1901 | /* vex_printf("Zero\n"); */ |
| 1902 | return AMD64G_FC_MASK_C3 | 0 |
| 1903 | | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1904 | } |
| 1905 | |
| 1906 | /* If exponent is zero but mantissa isn't, it's a denormal. |
| 1907 | Return 1,1,sign,0. */ |
| 1908 | if (bexp == 0 && !mantissaIsZero) { |
| 1909 | /* vex_printf("Denormal\n"); */ |
| 1910 | return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 |
| 1911 | | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1912 | } |
| 1913 | |
| 1914 | /* If the exponent is 7FF and the mantissa is zero, this is an infinity. |
| 1915 | Return 0,1,sign,1. */ |
| 1916 | if (bexp == 0x7FF && mantissaIsZero) { |
| 1917 | /* vex_printf("Inf\n"); */ |
| 1918 | return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) |
| 1919 | | AMD64G_FC_MASK_C0; |
| 1920 | } |
| 1921 | |
| 1922 | /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. |
| 1923 | Return 0,0,sign,1. */ |
| 1924 | if (bexp == 0x7FF && !mantissaIsZero) { |
| 1925 | /* vex_printf("NaN\n"); */ |
| 1926 | return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; |
| 1927 | } |
| 1928 | |
| 1929 | /* Uh, ok, we give up. It must be a normal finite number. |
| 1930 | Return 0,1,sign,0. |
| 1931 | */ |
| 1932 | /* vex_printf("normal\n"); */ |
| 1933 | return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1934 | } |
| 1935 | |
| 1936 | |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1937 | /* This is used to implement both 'frstor' and 'fldenv'. The latter |
| 1938 | appears to differ from the former only in that the 8 FP registers |
| 1939 | themselves are not transferred into the guest state. */ |
| 1940 | static |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1941 | VexEmNote do_put_x87 ( Bool moveRegs, |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1942 | /*IN*/Fpu_State* x87_state, |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1943 | /*OUT*/VexGuestAMD64State* vex_state ) |
| 1944 | { |
| 1945 | Int stno, preg; |
| 1946 | UInt tag; |
| 1947 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 1948 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1949 | UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7; |
| 1950 | UInt tagw = x87_state->env[FP_ENV_TAG]; |
| 1951 | UInt fpucw = x87_state->env[FP_ENV_CTRL]; |
| 1952 | UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1953 | VexEmNote ew; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1954 | UInt fpround; |
| 1955 | ULong pair; |
| 1956 | |
| 1957 | /* Copy registers and tags */ |
| 1958 | for (stno = 0; stno < 8; stno++) { |
| 1959 | preg = (stno + ftop) & 7; |
| 1960 | tag = (tagw >> (2*preg)) & 3; |
| 1961 | if (tag == 3) { |
| 1962 | /* register is empty */ |
| 1963 | /* hmm, if it's empty, does it still get written? Probably |
| 1964 | safer to say it does. If we don't, memcheck could get out |
| 1965 | of sync, in that it thinks all FP registers are defined by |
| 1966 | this helper, but in reality some have not been updated. */ |
| 1967 | if (moveRegs) |
| 1968 | vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| 1969 | vexTags[preg] = 0; |
| 1970 | } else { |
| 1971 | /* register is non-empty */ |
| 1972 | if (moveRegs) |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 1973 | convert_f80le_to_f64le( &x87_state->reg[10*stno], |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1974 | (UChar*)&vexRegs[preg] ); |
| 1975 | vexTags[preg] = 1; |
| 1976 | } |
| 1977 | } |
| 1978 | |
| 1979 | /* stack pointer */ |
| 1980 | vex_state->guest_FTOP = ftop; |
| 1981 | |
| 1982 | /* status word */ |
| 1983 | vex_state->guest_FC3210 = c3210; |
| 1984 | |
| 1985 | /* handle the control word, setting FPROUND and detecting any |
| 1986 | emulation warnings. */ |
| 1987 | pair = amd64g_check_fldcw ( (ULong)fpucw ); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 1988 | fpround = (UInt)pair & 0xFFFFFFFFULL; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1989 | ew = (VexEmNote)(pair >> 32); |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1990 | |
| 1991 | vex_state->guest_FPROUND = fpround & 3; |
| 1992 | |
| 1993 | /* emulation warnings --> caller */ |
| 1994 | return ew; |
| 1995 | } |
| 1996 | |
| 1997 | |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 1998 | /* Create an x87 FPU state from the guest state, as close as |
| 1999 | we can approximate it. */ |
| 2000 | static |
| 2001 | void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2002 | /*OUT*/Fpu_State* x87_state ) |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2003 | { |
| 2004 | Int i, stno, preg; |
| 2005 | UInt tagw; |
| 2006 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 2007 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2008 | UInt ftop = vex_state->guest_FTOP; |
| 2009 | UInt c3210 = vex_state->guest_FC3210; |
| 2010 | |
| 2011 | for (i = 0; i < 14; i++) |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2012 | x87_state->env[i] = 0; |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2013 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2014 | x87_state->env[1] = x87_state->env[3] = x87_state->env[5] |
| 2015 | = x87_state->env[13] = 0xFFFF; |
| 2016 | x87_state->env[FP_ENV_STAT] |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2017 | = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2018 | x87_state->env[FP_ENV_CTRL] |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2019 | = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| 2020 | |
| 2021 | /* Dump the register stack in ST order. */ |
| 2022 | tagw = 0; |
| 2023 | for (stno = 0; stno < 8; stno++) { |
| 2024 | preg = (stno + ftop) & 7; |
| 2025 | if (vexTags[preg] == 0) { |
| 2026 | /* register is empty */ |
| 2027 | tagw |= (3 << (2*preg)); |
| 2028 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2029 | &x87_state->reg[10*stno] ); |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2030 | } else { |
| 2031 | /* register is full. */ |
| 2032 | tagw |= (0 << (2*preg)); |
| 2033 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2034 | &x87_state->reg[10*stno] ); |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2035 | } |
| 2036 | } |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2037 | x87_state->env[FP_ENV_TAG] = toUShort(tagw); |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2038 | } |
| 2039 | |
| 2040 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2041 | /*---------------------------------------------------------------*/ |
| 2042 | /*--- Supporting functions for XSAVE/FXSAVE. ---*/ |
| 2043 | /*---------------------------------------------------------------*/ |
| 2044 | |
| 2045 | /* CALLED FROM GENERATED CODE */ |
| 2046 | /* DIRTY HELPER (reads guest state, writes guest mem) */ |
| 2047 | /* XSAVE component 0 is the x87 FPU state. */ |
| 2048 | void amd64g_dirtyhelper_XSAVE_COMPONENT_0 |
| 2049 | ( VexGuestAMD64State* gst, HWord addr ) |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2050 | { |
| 2051 | /* Derived from values obtained from |
| 2052 | vendor_id : AuthenticAMD |
| 2053 | cpu family : 15 |
| 2054 | model : 12 |
| 2055 | model name : AMD Athlon(tm) 64 Processor 3200+ |
| 2056 | stepping : 0 |
| 2057 | cpu MHz : 2200.000 |
| 2058 | cache size : 512 KB |
| 2059 | */ |
| 2060 | /* Somewhat roundabout, but at least it's simple. */ |
| 2061 | Fpu_State tmp; |
| 2062 | UShort* addrS = (UShort*)addr; |
| 2063 | UChar* addrC = (UChar*)addr; |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2064 | UShort fp_tags; |
| 2065 | UInt summary_tags; |
| 2066 | Int r, stno; |
| 2067 | UShort *srcS, *dstS; |
| 2068 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2069 | do_get_x87( gst, &tmp ); |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2070 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2071 | /* Now build the proper fxsave x87 image from the fsave x87 image |
| 2072 | we just made. */ |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2073 | |
| 2074 | addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ |
| 2075 | addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ |
| 2076 | |
| 2077 | /* set addrS[2] in an endian-independent way */ |
| 2078 | summary_tags = 0; |
| 2079 | fp_tags = tmp.env[FP_ENV_TAG]; |
| 2080 | for (r = 0; r < 8; r++) { |
| 2081 | if ( ((fp_tags >> (2*r)) & 3) != 3 ) |
| 2082 | summary_tags |= (1 << r); |
| 2083 | } |
| 2084 | addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ |
| 2085 | addrC[5] = 0; /* pad */ |
| 2086 | |
| 2087 | /* FOP: faulting fpu opcode. From experimentation, the real CPU |
| 2088 | does not write this field. (?!) */ |
| 2089 | addrS[3] = 0; /* BOGUS */ |
| 2090 | |
| 2091 | /* RIP (Last x87 instruction pointer). From experimentation, the |
| 2092 | real CPU does not write this field. (?!) */ |
| 2093 | addrS[4] = 0; /* BOGUS */ |
| 2094 | addrS[5] = 0; /* BOGUS */ |
| 2095 | addrS[6] = 0; /* BOGUS */ |
| 2096 | addrS[7] = 0; /* BOGUS */ |
| 2097 | |
| 2098 | /* RDP (Last x87 data pointer). From experimentation, the real CPU |
| 2099 | does not write this field. (?!) */ |
| 2100 | addrS[8] = 0; /* BOGUS */ |
| 2101 | addrS[9] = 0; /* BOGUS */ |
| 2102 | addrS[10] = 0; /* BOGUS */ |
| 2103 | addrS[11] = 0; /* BOGUS */ |
| 2104 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2105 | /* addrS[13,12] are MXCSR -- not written */ |
| 2106 | /* addrS[15,14] are MXCSR_MASK -- not written */ |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2107 | |
| 2108 | /* Copy in the FP registers, in ST order. */ |
| 2109 | for (stno = 0; stno < 8; stno++) { |
| 2110 | srcS = (UShort*)(&tmp.reg[10*stno]); |
| 2111 | dstS = (UShort*)(&addrS[16 + 8*stno]); |
| 2112 | dstS[0] = srcS[0]; |
| 2113 | dstS[1] = srcS[1]; |
| 2114 | dstS[2] = srcS[2]; |
| 2115 | dstS[3] = srcS[3]; |
| 2116 | dstS[4] = srcS[4]; |
| 2117 | dstS[5] = 0; |
| 2118 | dstS[6] = 0; |
| 2119 | dstS[7] = 0; |
| 2120 | } |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 2121 | } |
| 2122 | |
| 2123 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2124 | /* CALLED FROM GENERATED CODE */ |
| 2125 | /* DIRTY HELPER (reads guest state, writes guest mem) */ |
| 2126 | /* XSAVE component 1 is the SSE state. */ |
| 2127 | void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS |
| 2128 | ( VexGuestAMD64State* gst, HWord addr ) |
| 2129 | { |
| 2130 | UShort* addrS = (UShort*)addr; |
| 2131 | UInt mxcsr; |
| 2132 | |
| 2133 | /* The only non-register parts of the SSE state are MXCSR and |
| 2134 | MXCSR_MASK. */ |
| 2135 | mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); |
| 2136 | |
| 2137 | addrS[12] = toUShort(mxcsr); /* MXCSR */ |
| 2138 | addrS[13] = toUShort(mxcsr >> 16); |
| 2139 | |
| 2140 | addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ |
| 2141 | addrS[15] = 0x0000; /* MXCSR mask (hi16) */ |
| 2142 | } |
| 2143 | |
| 2144 | |
| 2145 | /* VISIBLE TO LIBVEX CLIENT */ |
| 2146 | /* Do FXSAVE from the supplied VexGuestAMD64State structure and store |
| 2147 | the result at the given address which represents a buffer of at |
| 2148 | least 416 bytes. |
| 2149 | |
| 2150 | This function is not called from generated code. FXSAVE is dealt |
| 2151 | with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1} |
| 2152 | functions above plus some in-line IR. This function is merely a |
| 2153 | convenience function for VEX's users. |
| 2154 | */ |
| 2155 | void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst, |
| 2156 | /*OUT*/HWord fp_state ) |
| 2157 | { |
| 2158 | /* Do the x87 part */ |
| 2159 | amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state); |
| 2160 | |
| 2161 | /* And now the SSE part, except for the registers themselves. */ |
| 2162 | amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state); |
| 2163 | |
| 2164 | /* That's the first 160 bytes of the image done. */ |
| 2165 | /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is |
| 2166 | big-endian, these need to be byte-swapped. */ |
| 2167 | U128 *xmm = (U128 *)(fp_state + 160); |
| 2168 | vassert(host_is_little_endian()); |
| 2169 | |
| 2170 | # define COPY_U128(_dst,_src) \ |
| 2171 | do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ |
| 2172 | _dst[2] = _src[2]; _dst[3] = _src[3]; } \ |
| 2173 | while (0) |
| 2174 | |
| 2175 | COPY_U128( xmm[0], gst->guest_YMM0 ); |
| 2176 | COPY_U128( xmm[1], gst->guest_YMM1 ); |
| 2177 | COPY_U128( xmm[2], gst->guest_YMM2 ); |
| 2178 | COPY_U128( xmm[3], gst->guest_YMM3 ); |
| 2179 | COPY_U128( xmm[4], gst->guest_YMM4 ); |
| 2180 | COPY_U128( xmm[5], gst->guest_YMM5 ); |
| 2181 | COPY_U128( xmm[6], gst->guest_YMM6 ); |
| 2182 | COPY_U128( xmm[7], gst->guest_YMM7 ); |
| 2183 | COPY_U128( xmm[8], gst->guest_YMM8 ); |
| 2184 | COPY_U128( xmm[9], gst->guest_YMM9 ); |
| 2185 | COPY_U128( xmm[10], gst->guest_YMM10 ); |
| 2186 | COPY_U128( xmm[11], gst->guest_YMM11 ); |
| 2187 | COPY_U128( xmm[12], gst->guest_YMM12 ); |
| 2188 | COPY_U128( xmm[13], gst->guest_YMM13 ); |
| 2189 | COPY_U128( xmm[14], gst->guest_YMM14 ); |
| 2190 | COPY_U128( xmm[15], gst->guest_YMM15 ); |
| 2191 | # undef COPY_U128 |
| 2192 | } |
| 2193 | |
| 2194 | |
| 2195 | /*---------------------------------------------------------------*/ |
| 2196 | /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/ |
| 2197 | /*---------------------------------------------------------------*/ |
| 2198 | |
| 2199 | /* CALLED FROM GENERATED CODE */ |
| 2200 | /* DIRTY HELPER (writes guest state, reads guest mem) */ |
| 2201 | VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0 |
| 2202 | ( VexGuestAMD64State* gst, HWord addr ) |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2203 | { |
| 2204 | Fpu_State tmp; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2205 | UShort* addrS = (UShort*)addr; |
| 2206 | UChar* addrC = (UChar*)addr; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2207 | UShort fp_tags; |
| 2208 | Int r, stno, i; |
| 2209 | |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2210 | /* Copy the x87 registers out of the image, into a temporary |
| 2211 | Fpu_State struct. */ |
| 2212 | for (i = 0; i < 14; i++) tmp.env[i] = 0; |
| 2213 | for (i = 0; i < 80; i++) tmp.reg[i] = 0; |
| 2214 | /* fill in tmp.reg[0..7] */ |
| 2215 | for (stno = 0; stno < 8; stno++) { |
| 2216 | UShort* dstS = (UShort*)(&tmp.reg[10*stno]); |
| 2217 | UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); |
| 2218 | dstS[0] = srcS[0]; |
| 2219 | dstS[1] = srcS[1]; |
| 2220 | dstS[2] = srcS[2]; |
| 2221 | dstS[3] = srcS[3]; |
| 2222 | dstS[4] = srcS[4]; |
| 2223 | } |
| 2224 | /* fill in tmp.env[0..13] */ |
| 2225 | tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ |
| 2226 | tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ |
| 2227 | |
| 2228 | fp_tags = 0; |
| 2229 | for (r = 0; r < 8; r++) { |
| 2230 | if (addrC[4] & (1<<r)) |
| 2231 | fp_tags |= (0 << (2*r)); /* EMPTY */ |
| 2232 | else |
| 2233 | fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ |
| 2234 | } |
| 2235 | tmp.env[FP_ENV_TAG] = fp_tags; |
| 2236 | |
| 2237 | /* Now write 'tmp' into the guest state. */ |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2238 | VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst ); |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2239 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2240 | return warnX87; |
| 2241 | } |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2242 | |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2243 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2244 | /* CALLED FROM GENERATED CODE */ |
| 2245 | /* DIRTY HELPER (writes guest state, reads guest mem) */ |
| 2246 | VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS |
| 2247 | ( VexGuestAMD64State* gst, HWord addr ) |
| 2248 | { |
| 2249 | UShort* addrS = (UShort*)addr; |
| 2250 | UInt w32 = (((UInt)addrS[12]) & 0xFFFF) |
| 2251 | | ((((UInt)addrS[13]) & 0xFFFF) << 16); |
| 2252 | ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); |
| 2253 | |
| 2254 | VexEmNote warnXMM = (VexEmNote)(w64 >> 32); |
| 2255 | |
| 2256 | gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; |
| 2257 | return warnXMM; |
| 2258 | } |
| 2259 | |
| 2260 | |
| 2261 | /* VISIBLE TO LIBVEX CLIENT */ |
| 2262 | /* Do FXRSTOR from the supplied address and store read values to the given |
| 2263 | VexGuestAMD64State structure. |
| 2264 | |
| 2265 | This function is not called from generated code. FXRSTOR is dealt |
| 2266 | with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1} |
| 2267 | functions above plus some in-line IR. This function is merely a |
| 2268 | convenience function for VEX's users. |
| 2269 | */ |
| 2270 | VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state, |
| 2271 | /*MOD*/VexGuestAMD64State* gst ) |
| 2272 | { |
| 2273 | /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need |
| 2274 | to be byte-swapped. */ |
| 2275 | U128 *xmm = (U128 *)(fp_state + 160); |
| 2276 | |
| 2277 | vassert(host_is_little_endian()); |
| 2278 | |
| 2279 | # define COPY_U128(_dst,_src) \ |
| 2280 | do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ |
| 2281 | _dst[2] = _src[2]; _dst[3] = _src[3]; } \ |
| 2282 | while (0) |
| 2283 | |
| 2284 | COPY_U128( gst->guest_YMM0, xmm[0] ); |
| 2285 | COPY_U128( gst->guest_YMM1, xmm[1] ); |
| 2286 | COPY_U128( gst->guest_YMM2, xmm[2] ); |
| 2287 | COPY_U128( gst->guest_YMM3, xmm[3] ); |
| 2288 | COPY_U128( gst->guest_YMM4, xmm[4] ); |
| 2289 | COPY_U128( gst->guest_YMM5, xmm[5] ); |
| 2290 | COPY_U128( gst->guest_YMM6, xmm[6] ); |
| 2291 | COPY_U128( gst->guest_YMM7, xmm[7] ); |
| 2292 | COPY_U128( gst->guest_YMM8, xmm[8] ); |
| 2293 | COPY_U128( gst->guest_YMM9, xmm[9] ); |
| 2294 | COPY_U128( gst->guest_YMM10, xmm[10] ); |
| 2295 | COPY_U128( gst->guest_YMM11, xmm[11] ); |
| 2296 | COPY_U128( gst->guest_YMM12, xmm[12] ); |
| 2297 | COPY_U128( gst->guest_YMM13, xmm[13] ); |
| 2298 | COPY_U128( gst->guest_YMM14, xmm[14] ); |
| 2299 | COPY_U128( gst->guest_YMM15, xmm[15] ); |
| 2300 | |
| 2301 | # undef COPY_U128 |
| 2302 | |
| 2303 | VexEmNote warnXMM |
| 2304 | = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state); |
| 2305 | VexEmNote warnX87 |
| 2306 | = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state); |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2307 | |
| 2308 | /* Prefer an X87 emwarn over an XMM one, if both exist. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2309 | if (warnX87 != EmNote_NONE) |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2310 | return warnX87; |
| 2311 | else |
| 2312 | return warnXMM; |
| 2313 | } |
| 2314 | |
| 2315 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2316 | /*---------------------------------------------------------------*/ |
| 2317 | /*--- Supporting functions for FSAVE/FRSTOR ---*/ |
| 2318 | /*---------------------------------------------------------------*/ |
sewardj | 3e5d82d | 2015-07-21 14:43:23 +0000 | [diff] [blame] | 2319 | |
sewardj | 0585a03 | 2005-11-05 02:55:06 +0000 | [diff] [blame] | 2320 | /* DIRTY HELPER (writes guest state) */ |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 2321 | /* Initialise the x87 FPU state as per 'finit'. */ |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 2322 | void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) |
| 2323 | { |
| 2324 | Int i; |
| 2325 | gst->guest_FTOP = 0; |
| 2326 | for (i = 0; i < 8; i++) { |
| 2327 | gst->guest_FPTAG[i] = 0; /* empty */ |
| 2328 | gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ |
| 2329 | } |
| 2330 | gst->guest_FPROUND = (ULong)Irrm_NEAREST; |
| 2331 | gst->guest_FC3210 = 0; |
| 2332 | } |
| 2333 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2334 | |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2335 | /* CALLED FROM GENERATED CODE */ |
| 2336 | /* DIRTY HELPER (reads guest memory) */ |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2337 | ULong amd64g_dirtyhelper_loadF80le ( Addr addrU ) |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2338 | { |
| 2339 | ULong f64; |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2340 | convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2341 | return f64; |
| 2342 | } |
| 2343 | |
| 2344 | /* CALLED FROM GENERATED CODE */ |
| 2345 | /* DIRTY HELPER (writes guest memory) */ |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2346 | void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2347 | { |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2348 | convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2349 | } |
| 2350 | |
| 2351 | |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2352 | /* CALLED FROM GENERATED CODE */ |
| 2353 | /* CLEAN HELPER */ |
| 2354 | /* mxcsr[15:0] contains a SSE native format MXCSR value. |
| 2355 | Extract from it the required SSEROUND value and any resulting |
| 2356 | emulation warning, and return (warn << 32) | sseround value. |
| 2357 | */ |
| 2358 | ULong amd64g_check_ldmxcsr ( ULong mxcsr ) |
| 2359 | { |
| 2360 | /* Decide on a rounding mode. mxcsr[14:13] holds it. */ |
| 2361 | /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| 2362 | ULong rmode = (mxcsr >> 13) & 3; |
| 2363 | |
| 2364 | /* Detect any required emulation warnings. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2365 | VexEmNote ew = EmNote_NONE; |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2366 | |
| 2367 | if ((mxcsr & 0x1F80) != 0x1F80) { |
| 2368 | /* unmasked exceptions! */ |
| 2369 | ew = EmWarn_X86_sseExns; |
| 2370 | } |
| 2371 | else |
| 2372 | if (mxcsr & (1<<15)) { |
| 2373 | /* FZ is set */ |
| 2374 | ew = EmWarn_X86_fz; |
| 2375 | } |
| 2376 | else |
| 2377 | if (mxcsr & (1<<6)) { |
| 2378 | /* DAZ is set */ |
| 2379 | ew = EmWarn_X86_daz; |
| 2380 | } |
| 2381 | |
| 2382 | return (((ULong)ew) << 32) | ((ULong)rmode); |
| 2383 | } |
| 2384 | |
| 2385 | |
| 2386 | /* CALLED FROM GENERATED CODE */ |
| 2387 | /* CLEAN HELPER */ |
| 2388 | /* Given sseround as an IRRoundingMode value, create a suitable SSE |
| 2389 | native format MXCSR value. */ |
| 2390 | ULong amd64g_create_mxcsr ( ULong sseround ) |
| 2391 | { |
| 2392 | sseround &= 3; |
| 2393 | return 0x1F80 | (sseround << 13); |
| 2394 | } |
| 2395 | |
| 2396 | |
sewardj | 5e20537 | 2005-05-09 02:57:08 +0000 | [diff] [blame] | 2397 | /* CLEAN HELPER */ |
| 2398 | /* fpucw[15:0] contains a x87 native format FPU control word. |
| 2399 | Extract from it the required FPROUND value and any resulting |
| 2400 | emulation warning, and return (warn << 32) | fpround value. |
| 2401 | */ |
| 2402 | ULong amd64g_check_fldcw ( ULong fpucw ) |
| 2403 | { |
| 2404 | /* Decide on a rounding mode. fpucw[11:10] holds it. */ |
| 2405 | /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| 2406 | ULong rmode = (fpucw >> 10) & 3; |
| 2407 | |
| 2408 | /* Detect any required emulation warnings. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2409 | VexEmNote ew = EmNote_NONE; |
sewardj | 5e20537 | 2005-05-09 02:57:08 +0000 | [diff] [blame] | 2410 | |
| 2411 | if ((fpucw & 0x3F) != 0x3F) { |
| 2412 | /* unmasked exceptions! */ |
| 2413 | ew = EmWarn_X86_x87exns; |
| 2414 | } |
| 2415 | else |
| 2416 | if (((fpucw >> 8) & 3) != 3) { |
| 2417 | /* unsupported precision */ |
| 2418 | ew = EmWarn_X86_x87precision; |
| 2419 | } |
| 2420 | |
| 2421 | return (((ULong)ew) << 32) | ((ULong)rmode); |
| 2422 | } |
| 2423 | |
| 2424 | |
| 2425 | /* CLEAN HELPER */ |
| 2426 | /* Given fpround as an IRRoundingMode value, create a suitable x87 |
| 2427 | native format FPU control word. */ |
| 2428 | ULong amd64g_create_fpucw ( ULong fpround ) |
| 2429 | { |
| 2430 | fpround &= 3; |
| 2431 | return 0x037F | (fpround << 10); |
| 2432 | } |
| 2433 | |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2434 | |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2435 | /* This is used to implement 'fldenv'. |
| 2436 | Reads 28 bytes at x87_state[0 .. 27]. */ |
| 2437 | /* CALLED FROM GENERATED CODE */ |
| 2438 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2439 | VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2440 | /*IN*/HWord x87_state) |
| 2441 | { |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2442 | return do_put_x87( False, (Fpu_State*)x87_state, vex_state ); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2443 | } |
| 2444 | |
| 2445 | |
| 2446 | /* CALLED FROM GENERATED CODE */ |
| 2447 | /* DIRTY HELPER */ |
| 2448 | /* Create an x87 FPU env from the guest state, as close as we can |
| 2449 | approximate it. Writes 28 bytes at x87_state[0..27]. */ |
| 2450 | void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, |
| 2451 | /*OUT*/HWord x87_state ) |
| 2452 | { |
| 2453 | Int i, stno, preg; |
| 2454 | UInt tagw; |
| 2455 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2456 | Fpu_State* x87 = (Fpu_State*)x87_state; |
| 2457 | UInt ftop = vex_state->guest_FTOP; |
| 2458 | ULong c3210 = vex_state->guest_FC3210; |
| 2459 | |
| 2460 | for (i = 0; i < 14; i++) |
| 2461 | x87->env[i] = 0; |
| 2462 | |
| 2463 | x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| 2464 | x87->env[FP_ENV_STAT] |
sewardj | 81d72ea | 2005-06-14 21:59:16 +0000 | [diff] [blame] | 2465 | = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2466 | x87->env[FP_ENV_CTRL] |
sewardj | 81d72ea | 2005-06-14 21:59:16 +0000 | [diff] [blame] | 2467 | = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2468 | |
| 2469 | /* Compute the x87 tag word. */ |
| 2470 | tagw = 0; |
| 2471 | for (stno = 0; stno < 8; stno++) { |
| 2472 | preg = (stno + ftop) & 7; |
| 2473 | if (vexTags[preg] == 0) { |
| 2474 | /* register is empty */ |
| 2475 | tagw |= (3 << (2*preg)); |
| 2476 | } else { |
| 2477 | /* register is full. */ |
| 2478 | tagw |= (0 << (2*preg)); |
| 2479 | } |
| 2480 | } |
| 2481 | x87->env[FP_ENV_TAG] = toUShort(tagw); |
| 2482 | |
| 2483 | /* We don't dump the x87 registers, tho. */ |
| 2484 | } |
| 2485 | |
| 2486 | |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2487 | /* This is used to implement 'fnsave'. |
| 2488 | Writes 108 bytes at x87_state[0 .. 107]. */ |
| 2489 | /* CALLED FROM GENERATED CODE */ |
| 2490 | /* DIRTY HELPER */ |
| 2491 | void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state, |
| 2492 | /*OUT*/HWord x87_state) |
| 2493 | { |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2494 | do_get_x87( vex_state, (Fpu_State*)x87_state ); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2495 | } |
| 2496 | |
| 2497 | |
| 2498 | /* This is used to implement 'fnsaves'. |
| 2499 | Writes 94 bytes at x87_state[0 .. 93]. */ |
| 2500 | /* CALLED FROM GENERATED CODE */ |
| 2501 | /* DIRTY HELPER */ |
| 2502 | void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state, |
| 2503 | /*OUT*/HWord x87_state) |
| 2504 | { |
| 2505 | Int i, stno, preg; |
| 2506 | UInt tagw; |
| 2507 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 2508 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2509 | Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| 2510 | UInt ftop = vex_state->guest_FTOP; |
| 2511 | UInt c3210 = vex_state->guest_FC3210; |
| 2512 | |
| 2513 | for (i = 0; i < 7; i++) |
| 2514 | x87->env[i] = 0; |
| 2515 | |
| 2516 | x87->env[FPS_ENV_STAT] |
| 2517 | = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
| 2518 | x87->env[FPS_ENV_CTRL] |
| 2519 | = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| 2520 | |
| 2521 | /* Dump the register stack in ST order. */ |
| 2522 | tagw = 0; |
| 2523 | for (stno = 0; stno < 8; stno++) { |
| 2524 | preg = (stno + ftop) & 7; |
| 2525 | if (vexTags[preg] == 0) { |
| 2526 | /* register is empty */ |
| 2527 | tagw |= (3 << (2*preg)); |
| 2528 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 2529 | &x87->reg[10*stno] ); |
| 2530 | } else { |
| 2531 | /* register is full. */ |
| 2532 | tagw |= (0 << (2*preg)); |
| 2533 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 2534 | &x87->reg[10*stno] ); |
| 2535 | } |
| 2536 | } |
| 2537 | x87->env[FPS_ENV_TAG] = toUShort(tagw); |
| 2538 | } |
| 2539 | |
| 2540 | |
| 2541 | /* This is used to implement 'frstor'. |
| 2542 | Reads 108 bytes at x87_state[0 .. 107]. */ |
| 2543 | /* CALLED FROM GENERATED CODE */ |
| 2544 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2545 | VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2546 | /*IN*/HWord x87_state) |
| 2547 | { |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame^] | 2548 | return do_put_x87( True, (Fpu_State*)x87_state, vex_state ); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2549 | } |
| 2550 | |
| 2551 | |
| 2552 | /* This is used to implement 'frstors'. |
| 2553 | Reads 94 bytes at x87_state[0 .. 93]. */ |
| 2554 | /* CALLED FROM GENERATED CODE */ |
| 2555 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2556 | VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2557 | /*IN*/HWord x87_state) |
| 2558 | { |
| 2559 | Int stno, preg; |
| 2560 | UInt tag; |
| 2561 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 2562 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2563 | Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| 2564 | UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7; |
| 2565 | UInt tagw = x87->env[FPS_ENV_TAG]; |
| 2566 | UInt fpucw = x87->env[FPS_ENV_CTRL]; |
| 2567 | UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2568 | VexEmNote ew; |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2569 | UInt fpround; |
| 2570 | ULong pair; |
| 2571 | |
| 2572 | /* Copy registers and tags */ |
| 2573 | for (stno = 0; stno < 8; stno++) { |
| 2574 | preg = (stno + ftop) & 7; |
| 2575 | tag = (tagw >> (2*preg)) & 3; |
| 2576 | if (tag == 3) { |
| 2577 | /* register is empty */ |
| 2578 | /* hmm, if it's empty, does it still get written? Probably |
| 2579 | safer to say it does. If we don't, memcheck could get out |
| 2580 | of sync, in that it thinks all FP registers are defined by |
| 2581 | this helper, but in reality some have not been updated. */ |
| 2582 | vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| 2583 | vexTags[preg] = 0; |
| 2584 | } else { |
| 2585 | /* register is non-empty */ |
| 2586 | convert_f80le_to_f64le( &x87->reg[10*stno], |
| 2587 | (UChar*)&vexRegs[preg] ); |
| 2588 | vexTags[preg] = 1; |
| 2589 | } |
| 2590 | } |
| 2591 | |
| 2592 | /* stack pointer */ |
| 2593 | vex_state->guest_FTOP = ftop; |
| 2594 | |
| 2595 | /* status word */ |
| 2596 | vex_state->guest_FC3210 = c3210; |
| 2597 | |
| 2598 | /* handle the control word, setting FPROUND and detecting any |
| 2599 | emulation warnings. */ |
| 2600 | pair = amd64g_check_fldcw ( (ULong)fpucw ); |
| 2601 | fpround = (UInt)pair & 0xFFFFFFFFULL; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2602 | ew = (VexEmNote)(pair >> 32); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2603 | |
| 2604 | vex_state->guest_FPROUND = fpround & 3; |
| 2605 | |
| 2606 | /* emulation warnings --> caller */ |
| 2607 | return ew; |
| 2608 | } |
| 2609 | |
| 2610 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2611 | /*---------------------------------------------------------------*/ |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2612 | /*--- CPUID helpers. ---*/ |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2613 | /*---------------------------------------------------------------*/ |
| 2614 | |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2615 | /* Claim to be the following CPU, which is probably representative of |
| 2616 | the lowliest (earliest) amd64 offerings. It can do neither sse3 |
| 2617 | nor cx16. |
| 2618 | |
| 2619 | vendor_id : AuthenticAMD |
| 2620 | cpu family : 15 |
| 2621 | model : 5 |
| 2622 | model name : AMD Opteron (tm) Processor 848 |
| 2623 | stepping : 10 |
| 2624 | cpu MHz : 1797.682 |
| 2625 | cache size : 1024 KB |
| 2626 | fpu : yes |
| 2627 | fpu_exception : yes |
| 2628 | cpuid level : 1 |
| 2629 | wp : yes |
| 2630 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2631 | mtrr pge mca cmov pat pse36 clflush mmx fxsr |
| 2632 | sse sse2 syscall nx mmxext lm 3dnowext 3dnow |
| 2633 | bogomips : 3600.62 |
| 2634 | TLB size : 1088 4K pages |
| 2635 | clflush size : 64 |
| 2636 | cache_alignment : 64 |
| 2637 | address sizes : 40 bits physical, 48 bits virtual |
sewardj | 1aa3aef | 2012-02-21 08:53:54 +0000 | [diff] [blame] | 2638 | power management: ts fid vid ttp |
| 2639 | |
| 2640 | 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact |
| 2641 | we don't support them. See #291568. 3dnow is 80000001.EDX.31 |
| 2642 | and 3dnowext is 80000001.EDX.30. |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2643 | */ |
| 2644 | void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) |
| 2645 | { |
| 2646 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2647 | do { st->guest_RAX = (ULong)(_a); \ |
| 2648 | st->guest_RBX = (ULong)(_b); \ |
| 2649 | st->guest_RCX = (ULong)(_c); \ |
| 2650 | st->guest_RDX = (ULong)(_d); \ |
| 2651 | } while (0) |
| 2652 | |
| 2653 | switch (0xFFFFFFFF & st->guest_RAX) { |
| 2654 | case 0x00000000: |
| 2655 | SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); |
| 2656 | break; |
| 2657 | case 0x00000001: |
| 2658 | SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); |
| 2659 | break; |
| 2660 | case 0x80000000: |
| 2661 | SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); |
| 2662 | break; |
| 2663 | case 0x80000001: |
sewardj | 1aa3aef | 2012-02-21 08:53:54 +0000 | [diff] [blame] | 2664 | /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is |
| 2665 | the original it-is-supported value that the h/w provides. |
| 2666 | See #291568. */ |
| 2667 | SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/ |
| 2668 | 0x21d3fbff); |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2669 | break; |
| 2670 | case 0x80000002: |
| 2671 | SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); |
| 2672 | break; |
| 2673 | case 0x80000003: |
| 2674 | SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); |
| 2675 | break; |
| 2676 | case 0x80000004: |
| 2677 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2678 | break; |
| 2679 | case 0x80000005: |
| 2680 | SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); |
| 2681 | break; |
| 2682 | case 0x80000006: |
| 2683 | SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); |
| 2684 | break; |
| 2685 | case 0x80000007: |
| 2686 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); |
| 2687 | break; |
| 2688 | case 0x80000008: |
| 2689 | SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); |
| 2690 | break; |
| 2691 | default: |
| 2692 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2693 | break; |
| 2694 | } |
| 2695 | # undef SET_ABCD |
| 2696 | } |
| 2697 | |
| 2698 | |
| 2699 | /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 |
| 2700 | capable. |
| 2701 | |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2702 | vendor_id : GenuineIntel |
| 2703 | cpu family : 6 |
| 2704 | model : 15 |
| 2705 | model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz |
| 2706 | stepping : 6 |
| 2707 | cpu MHz : 2394.000 |
| 2708 | cache size : 4096 KB |
| 2709 | physical id : 0 |
| 2710 | siblings : 2 |
| 2711 | core id : 0 |
| 2712 | cpu cores : 2 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2713 | fpu : yes |
| 2714 | fpu_exception : yes |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2715 | cpuid level : 10 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2716 | wp : yes |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2717 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2718 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2719 | mmx fxsr sse sse2 ss ht tm syscall nx lm |
| 2720 | constant_tsc pni monitor ds_cpl vmx est tm2 |
| 2721 | cx16 xtpr lahf_lm |
| 2722 | bogomips : 4798.78 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2723 | clflush size : 64 |
| 2724 | cache_alignment : 64 |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2725 | address sizes : 36 bits physical, 48 bits virtual |
| 2726 | power management: |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2727 | */ |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2728 | void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2729 | { |
| 2730 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2731 | do { st->guest_RAX = (ULong)(_a); \ |
| 2732 | st->guest_RBX = (ULong)(_b); \ |
| 2733 | st->guest_RCX = (ULong)(_c); \ |
| 2734 | st->guest_RDX = (ULong)(_d); \ |
| 2735 | } while (0) |
| 2736 | |
| 2737 | switch (0xFFFFFFFF & st->guest_RAX) { |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2738 | case 0x00000000: |
| 2739 | SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2740 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2741 | case 0x00000001: |
| 2742 | SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2743 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2744 | case 0x00000002: |
| 2745 | SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2746 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2747 | case 0x00000003: |
| 2748 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2749 | break; |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2750 | case 0x00000004: { |
| 2751 | switch (0xFFFFFFFF & st->guest_RCX) { |
| 2752 | case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, |
| 2753 | 0x0000003f, 0x00000001); break; |
| 2754 | case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, |
| 2755 | 0x0000003f, 0x00000001); break; |
| 2756 | case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, |
| 2757 | 0x00000fff, 0x00000001); break; |
| 2758 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2759 | 0x00000000, 0x00000000); break; |
| 2760 | } |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2761 | break; |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2762 | } |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2763 | case 0x00000005: |
| 2764 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2765 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2766 | case 0x00000006: |
| 2767 | SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2768 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2769 | case 0x00000007: |
| 2770 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2771 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2772 | case 0x00000008: |
| 2773 | SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2774 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2775 | case 0x00000009: |
| 2776 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2777 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2778 | case 0x0000000a: |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2779 | unhandled_eax_value: |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2780 | SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); |
| 2781 | break; |
| 2782 | case 0x80000000: |
| 2783 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 2784 | break; |
| 2785 | case 0x80000001: |
| 2786 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); |
| 2787 | break; |
| 2788 | case 0x80000002: |
| 2789 | SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| 2790 | break; |
| 2791 | case 0x80000003: |
| 2792 | SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); |
| 2793 | break; |
| 2794 | case 0x80000004: |
| 2795 | SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); |
| 2796 | break; |
| 2797 | case 0x80000005: |
| 2798 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2799 | break; |
| 2800 | case 0x80000006: |
| 2801 | SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); |
| 2802 | break; |
| 2803 | case 0x80000007: |
| 2804 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2805 | break; |
| 2806 | case 0x80000008: |
| 2807 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 2808 | break; |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2809 | default: |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2810 | goto unhandled_eax_value; |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2811 | } |
| 2812 | # undef SET_ABCD |
| 2813 | } |
| 2814 | |
| 2815 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 2816 | /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 |
| 2817 | capable. |
| 2818 | |
| 2819 | vendor_id : GenuineIntel |
| 2820 | cpu family : 6 |
| 2821 | model : 37 |
| 2822 | model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz |
| 2823 | stepping : 2 |
| 2824 | cpu MHz : 3334.000 |
| 2825 | cache size : 4096 KB |
| 2826 | physical id : 0 |
| 2827 | siblings : 4 |
| 2828 | core id : 0 |
| 2829 | cpu cores : 2 |
| 2830 | apicid : 0 |
| 2831 | initial apicid : 0 |
| 2832 | fpu : yes |
| 2833 | fpu_exception : yes |
| 2834 | cpuid level : 11 |
| 2835 | wp : yes |
| 2836 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2837 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2838 | mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| 2839 | lm constant_tsc arch_perfmon pebs bts rep_good |
| 2840 | xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| 2841 | dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 |
| 2842 | xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida |
| 2843 | arat tpr_shadow vnmi flexpriority ept vpid |
| 2844 | bogomips : 6957.57 |
| 2845 | clflush size : 64 |
| 2846 | cache_alignment : 64 |
| 2847 | address sizes : 36 bits physical, 48 bits virtual |
| 2848 | power management: |
| 2849 | */ |
| 2850 | void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) |
| 2851 | { |
| 2852 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2853 | do { st->guest_RAX = (ULong)(_a); \ |
| 2854 | st->guest_RBX = (ULong)(_b); \ |
| 2855 | st->guest_RCX = (ULong)(_c); \ |
| 2856 | st->guest_RDX = (ULong)(_d); \ |
| 2857 | } while (0) |
| 2858 | |
| 2859 | UInt old_eax = (UInt)st->guest_RAX; |
| 2860 | UInt old_ecx = (UInt)st->guest_RCX; |
| 2861 | |
| 2862 | switch (old_eax) { |
| 2863 | case 0x00000000: |
| 2864 | SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); |
| 2865 | break; |
| 2866 | case 0x00000001: |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 2867 | SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 2868 | break; |
| 2869 | case 0x00000002: |
| 2870 | SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); |
| 2871 | break; |
| 2872 | case 0x00000003: |
| 2873 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2874 | break; |
| 2875 | case 0x00000004: |
| 2876 | switch (old_ecx) { |
| 2877 | case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 2878 | 0x0000003f, 0x00000000); break; |
| 2879 | case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, |
| 2880 | 0x0000007f, 0x00000000); break; |
| 2881 | case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 2882 | 0x000001ff, 0x00000000); break; |
| 2883 | case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, |
| 2884 | 0x00000fff, 0x00000002); break; |
| 2885 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2886 | 0x00000000, 0x00000000); break; |
| 2887 | } |
| 2888 | break; |
| 2889 | case 0x00000005: |
| 2890 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| 2891 | break; |
| 2892 | case 0x00000006: |
| 2893 | SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); |
| 2894 | break; |
| 2895 | case 0x00000007: |
| 2896 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2897 | break; |
| 2898 | case 0x00000008: |
| 2899 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2900 | break; |
| 2901 | case 0x00000009: |
| 2902 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2903 | break; |
| 2904 | case 0x0000000a: |
| 2905 | SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); |
| 2906 | break; |
| 2907 | case 0x0000000b: |
| 2908 | switch (old_ecx) { |
| 2909 | case 0x00000000: |
| 2910 | SET_ABCD(0x00000001, 0x00000002, |
| 2911 | 0x00000100, 0x00000000); break; |
| 2912 | case 0x00000001: |
| 2913 | SET_ABCD(0x00000004, 0x00000004, |
| 2914 | 0x00000201, 0x00000000); break; |
| 2915 | default: |
| 2916 | SET_ABCD(0x00000000, 0x00000000, |
| 2917 | old_ecx, 0x00000000); break; |
| 2918 | } |
| 2919 | break; |
| 2920 | case 0x0000000c: |
| 2921 | SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| 2922 | break; |
| 2923 | case 0x0000000d: |
| 2924 | switch (old_ecx) { |
| 2925 | case 0x00000000: SET_ABCD(0x00000001, 0x00000002, |
| 2926 | 0x00000100, 0x00000000); break; |
| 2927 | case 0x00000001: SET_ABCD(0x00000004, 0x00000004, |
| 2928 | 0x00000201, 0x00000000); break; |
| 2929 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2930 | old_ecx, 0x00000000); break; |
| 2931 | } |
| 2932 | break; |
| 2933 | case 0x80000000: |
| 2934 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 2935 | break; |
| 2936 | case 0x80000001: |
| 2937 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| 2938 | break; |
| 2939 | case 0x80000002: |
| 2940 | SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| 2941 | break; |
| 2942 | case 0x80000003: |
| 2943 | SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); |
| 2944 | break; |
| 2945 | case 0x80000004: |
| 2946 | SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); |
| 2947 | break; |
| 2948 | case 0x80000005: |
| 2949 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2950 | break; |
| 2951 | case 0x80000006: |
| 2952 | SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| 2953 | break; |
| 2954 | case 0x80000007: |
| 2955 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| 2956 | break; |
| 2957 | case 0x80000008: |
| 2958 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 2959 | break; |
| 2960 | default: |
| 2961 | SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| 2962 | break; |
| 2963 | } |
| 2964 | # undef SET_ABCD |
| 2965 | } |
| 2966 | |
| 2967 | |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2968 | /* Claim to be the following CPU (4 x ...), which is AVX and cx16 |
sewardj | 9e4c376 | 2013-09-27 15:03:58 +0000 | [diff] [blame] | 2969 | capable. Plus (kludge!) it "supports" HTM. |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2970 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 2971 | Also with the following change: claim that XSaveOpt is not |
| 2972 | available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 |
| 2973 | on the real CPU. Consequently, programs that correctly observe |
| 2974 | these CPUID values should only try to use 3 of the 8 XSave-family |
| 2975 | instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids |
| 2976 | having to implement the compacted or optimised save/restore |
| 2977 | variants. |
| 2978 | |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2979 | vendor_id : GenuineIntel |
| 2980 | cpu family : 6 |
| 2981 | model : 42 |
| 2982 | model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz |
| 2983 | stepping : 7 |
| 2984 | cpu MHz : 1600.000 |
| 2985 | cache size : 6144 KB |
| 2986 | physical id : 0 |
| 2987 | siblings : 4 |
| 2988 | core id : 3 |
| 2989 | cpu cores : 4 |
| 2990 | apicid : 6 |
| 2991 | initial apicid : 6 |
| 2992 | fpu : yes |
| 2993 | fpu_exception : yes |
| 2994 | cpuid level : 13 |
| 2995 | wp : yes |
| 2996 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2997 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2998 | mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| 2999 | lm constant_tsc arch_perfmon pebs bts rep_good |
| 3000 | nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| 3001 | dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 |
| 3002 | xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx |
| 3003 | lahf_lm ida arat epb xsaveopt pln pts dts |
| 3004 | tpr_shadow vnmi flexpriority ept vpid |
| 3005 | |
| 3006 | bogomips : 5768.94 |
| 3007 | clflush size : 64 |
| 3008 | cache_alignment : 64 |
| 3009 | address sizes : 36 bits physical, 48 bits virtual |
| 3010 | power management: |
| 3011 | */ |
| 3012 | void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ) |
| 3013 | { |
| 3014 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 3015 | do { st->guest_RAX = (ULong)(_a); \ |
| 3016 | st->guest_RBX = (ULong)(_b); \ |
| 3017 | st->guest_RCX = (ULong)(_c); \ |
| 3018 | st->guest_RDX = (ULong)(_d); \ |
| 3019 | } while (0) |
| 3020 | |
| 3021 | UInt old_eax = (UInt)st->guest_RAX; |
| 3022 | UInt old_ecx = (UInt)st->guest_RCX; |
| 3023 | |
| 3024 | switch (old_eax) { |
| 3025 | case 0x00000000: |
| 3026 | SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); |
| 3027 | break; |
| 3028 | case 0x00000001: |
| 3029 | SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff); |
| 3030 | break; |
| 3031 | case 0x00000002: |
| 3032 | SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000); |
| 3033 | break; |
| 3034 | case 0x00000003: |
| 3035 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3036 | break; |
| 3037 | case 0x00000004: |
| 3038 | switch (old_ecx) { |
| 3039 | case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 3040 | 0x0000003f, 0x00000000); break; |
| 3041 | case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, |
| 3042 | 0x0000003f, 0x00000000); break; |
| 3043 | case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 3044 | 0x000001ff, 0x00000000); break; |
| 3045 | case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f, |
| 3046 | 0x00001fff, 0x00000006); break; |
| 3047 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3048 | 0x00000000, 0x00000000); break; |
| 3049 | } |
| 3050 | break; |
| 3051 | case 0x00000005: |
| 3052 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| 3053 | break; |
| 3054 | case 0x00000006: |
| 3055 | SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); |
| 3056 | break; |
| 3057 | case 0x00000007: |
sewardj | 9e4c376 | 2013-09-27 15:03:58 +0000 | [diff] [blame] | 3058 | SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000); |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 3059 | break; |
| 3060 | case 0x00000008: |
| 3061 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3062 | break; |
| 3063 | case 0x00000009: |
| 3064 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3065 | break; |
| 3066 | case 0x0000000a: |
| 3067 | SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); |
| 3068 | break; |
| 3069 | case 0x0000000b: |
| 3070 | switch (old_ecx) { |
| 3071 | case 0x00000000: |
| 3072 | SET_ABCD(0x00000001, 0x00000001, |
| 3073 | 0x00000100, 0x00000000); break; |
| 3074 | case 0x00000001: |
| 3075 | SET_ABCD(0x00000004, 0x00000004, |
| 3076 | 0x00000201, 0x00000000); break; |
| 3077 | default: |
| 3078 | SET_ABCD(0x00000000, 0x00000000, |
| 3079 | old_ecx, 0x00000000); break; |
| 3080 | } |
| 3081 | break; |
| 3082 | case 0x0000000c: |
| 3083 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3084 | break; |
| 3085 | case 0x0000000d: |
| 3086 | switch (old_ecx) { |
| 3087 | case 0x00000000: SET_ABCD(0x00000007, 0x00000340, |
| 3088 | 0x00000340, 0x00000000); break; |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 3089 | case 0x00000001: SET_ABCD(0x00000000, 0x00000000, |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 3090 | 0x00000000, 0x00000000); break; |
| 3091 | case 0x00000002: SET_ABCD(0x00000100, 0x00000240, |
| 3092 | 0x00000000, 0x00000000); break; |
| 3093 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3094 | 0x00000000, 0x00000000); break; |
| 3095 | } |
| 3096 | break; |
| 3097 | case 0x0000000e: |
| 3098 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 3099 | break; |
| 3100 | case 0x0000000f: |
| 3101 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 3102 | break; |
| 3103 | case 0x80000000: |
| 3104 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 3105 | break; |
| 3106 | case 0x80000001: |
| 3107 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| 3108 | break; |
| 3109 | case 0x80000002: |
| 3110 | SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c); |
| 3111 | break; |
| 3112 | case 0x80000003: |
| 3113 | SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d); |
| 3114 | break; |
| 3115 | case 0x80000004: |
| 3116 | SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847); |
| 3117 | break; |
| 3118 | case 0x80000005: |
| 3119 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3120 | break; |
| 3121 | case 0x80000006: |
| 3122 | SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| 3123 | break; |
| 3124 | case 0x80000007: |
| 3125 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| 3126 | break; |
| 3127 | case 0x80000008: |
| 3128 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 3129 | break; |
| 3130 | default: |
| 3131 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 3132 | break; |
| 3133 | } |
| 3134 | # undef SET_ABCD |
| 3135 | } |
| 3136 | |
| 3137 | |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 3138 | /* Claim to be the following CPU (4 x ...), which is AVX2 capable. |
| 3139 | |
| 3140 | With the following change: claim that XSaveOpt is not available, by |
| 3141 | cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real |
| 3142 | CPU. Consequently, programs that correctly observe these CPUID |
| 3143 | values should only try to use 3 of the 8 XSave-family instructions: |
| 3144 | XGETBV, XSAVE and XRSTOR. In particular this avoids having to |
| 3145 | implement the compacted or optimised save/restore variants. |
| 3146 | |
| 3147 | vendor_id : GenuineIntel |
| 3148 | cpu family : 6 |
| 3149 | model : 60 |
| 3150 | model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz |
| 3151 | stepping : 3 |
| 3152 | microcode : 0x1c |
| 3153 | cpu MHz : 919.957 |
| 3154 | cache size : 8192 KB |
| 3155 | physical id : 0 |
| 3156 | siblings : 4 |
| 3157 | core id : 3 |
| 3158 | cpu cores : 4 |
| 3159 | apicid : 6 |
| 3160 | initial apicid : 6 |
| 3161 | fpu : yes |
| 3162 | fpu_exception : yes |
| 3163 | cpuid level : 13 |
| 3164 | wp : yes |
| 3165 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca |
| 3166 | cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht |
| 3167 | tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc |
| 3168 | arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc |
| 3169 | aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl |
| 3170 | vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 |
| 3171 | sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave |
| 3172 | avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm |
| 3173 | tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust |
| 3174 | bmi1 avx2 smep bmi2 erms invpcid xsaveopt |
| 3175 | bugs : |
| 3176 | bogomips : 5786.68 |
| 3177 | clflush size : 64 |
| 3178 | cache_alignment : 64 |
| 3179 | address sizes : 39 bits physical, 48 bits virtual |
| 3180 | power management: |
| 3181 | */ |
| 3182 | void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st ) |
| 3183 | { |
| 3184 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 3185 | do { st->guest_RAX = (ULong)(_a); \ |
| 3186 | st->guest_RBX = (ULong)(_b); \ |
| 3187 | st->guest_RCX = (ULong)(_c); \ |
| 3188 | st->guest_RDX = (ULong)(_d); \ |
| 3189 | } while (0) |
| 3190 | |
| 3191 | UInt old_eax = (UInt)st->guest_RAX; |
| 3192 | UInt old_ecx = (UInt)st->guest_RCX; |
| 3193 | |
| 3194 | switch (old_eax) { |
| 3195 | case 0x00000000: |
| 3196 | SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); |
| 3197 | break; |
| 3198 | case 0x00000001: |
mjw | e602fa3 | 2015-10-01 12:31:19 +0000 | [diff] [blame] | 3199 | /* Don't advertise RDRAND support, bit 30 in ECX. */ |
| 3200 | SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff); |
sewardj | 70dbeb0 | 2015-08-12 11:15:53 +0000 | [diff] [blame] | 3201 | break; |
| 3202 | case 0x00000002: |
| 3203 | SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000); |
| 3204 | break; |
| 3205 | case 0x00000003: |
| 3206 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3207 | break; |
| 3208 | case 0x00000004: |
| 3209 | switch (old_ecx) { |
| 3210 | case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 3211 | 0x0000003f, 0x00000000); break; |
| 3212 | case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, |
| 3213 | 0x0000003f, 0x00000000); break; |
| 3214 | case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 3215 | 0x000001ff, 0x00000000); break; |
| 3216 | case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, |
| 3217 | 0x00001fff, 0x00000006); break; |
| 3218 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3219 | 0x00000000, 0x00000000); break; |
| 3220 | } |
| 3221 | break; |
| 3222 | case 0x00000005: |
| 3223 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120); |
| 3224 | break; |
| 3225 | case 0x00000006: |
| 3226 | SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); |
| 3227 | break; |
| 3228 | case 0x00000007: |
| 3229 | switch (old_ecx) { |
| 3230 | case 0x00000000: SET_ABCD(0x00000000, 0x000027ab, |
| 3231 | 0x00000000, 0x00000000); break; |
| 3232 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3233 | 0x00000000, 0x00000000); break; |
| 3234 | } |
| 3235 | break; |
| 3236 | case 0x00000008: |
| 3237 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3238 | break; |
| 3239 | case 0x00000009: |
| 3240 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3241 | break; |
| 3242 | case 0x0000000a: |
| 3243 | SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); |
| 3244 | break; |
| 3245 | case 0x0000000b: |
| 3246 | switch (old_ecx) { |
| 3247 | case 0x00000000: SET_ABCD(0x00000001, 0x00000002, |
| 3248 | 0x00000100, 0x00000002); break; |
| 3249 | case 0x00000001: SET_ABCD(0x00000004, 0x00000008, |
| 3250 | 0x00000201, 0x00000002); break; |
| 3251 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3252 | old_ecx, 0x00000002); break; |
| 3253 | } |
| 3254 | break; |
| 3255 | case 0x0000000c: |
| 3256 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3257 | break; |
| 3258 | case 0x0000000d: |
| 3259 | switch (old_ecx) { |
| 3260 | case 0x00000000: SET_ABCD(0x00000007, 0x00000340, |
| 3261 | 0x00000340, 0x00000000); break; |
| 3262 | case 0x00000001: SET_ABCD(0x00000000, 0x00000000, |
| 3263 | 0x00000000, 0x00000000); break; |
| 3264 | case 0x00000002: SET_ABCD(0x00000100, 0x00000240, |
| 3265 | 0x00000000, 0x00000000); break; |
| 3266 | default: SET_ABCD(0x00000000, 0x00000000, |
| 3267 | 0x00000000, 0x00000000); break; |
| 3268 | } |
| 3269 | break; |
| 3270 | case 0x80000000: |
| 3271 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 3272 | break; |
| 3273 | case 0x80000001: |
| 3274 | SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800); |
| 3275 | break; |
| 3276 | case 0x80000002: |
| 3277 | SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| 3278 | break; |
| 3279 | case 0x80000003: |
| 3280 | SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043); |
| 3281 | break; |
| 3282 | case 0x80000004: |
| 3283 | SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000); |
| 3284 | break; |
| 3285 | case 0x80000005: |
| 3286 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 3287 | break; |
| 3288 | case 0x80000006: |
| 3289 | SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| 3290 | break; |
| 3291 | case 0x80000007: |
| 3292 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| 3293 | break; |
| 3294 | case 0x80000008: |
| 3295 | SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000); |
| 3296 | break; |
| 3297 | default: |
| 3298 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 3299 | break; |
| 3300 | } |
| 3301 | # undef SET_ABCD |
| 3302 | } |
| 3303 | |
| 3304 | |
| 3305 | /*---------------------------------------------------------------*/ |
| 3306 | /*--- Misc integer helpers, including rotates and crypto. ---*/ |
| 3307 | /*---------------------------------------------------------------*/ |
| 3308 | |
sewardj | 112b099 | 2005-07-23 13:19:32 +0000 | [diff] [blame] | 3309 | ULong amd64g_calculate_RCR ( ULong arg, |
| 3310 | ULong rot_amt, |
| 3311 | ULong rflags_in, |
| 3312 | Long szIN ) |
| 3313 | { |
| 3314 | Bool wantRflags = toBool(szIN < 0); |
| 3315 | ULong sz = wantRflags ? (-szIN) : szIN; |
| 3316 | ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| 3317 | ULong cf=0, of=0, tempcf; |
| 3318 | |
| 3319 | switch (sz) { |
| 3320 | case 8: |
| 3321 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3322 | of = ((arg >> 63) ^ cf) & 1; |
| 3323 | while (tempCOUNT > 0) { |
| 3324 | tempcf = arg & 1; |
| 3325 | arg = (arg >> 1) | (cf << 63); |
| 3326 | cf = tempcf; |
| 3327 | tempCOUNT--; |
| 3328 | } |
| 3329 | break; |
| 3330 | case 4: |
| 3331 | while (tempCOUNT >= 33) tempCOUNT -= 33; |
| 3332 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3333 | of = ((arg >> 31) ^ cf) & 1; |
| 3334 | while (tempCOUNT > 0) { |
| 3335 | tempcf = arg & 1; |
| 3336 | arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); |
| 3337 | cf = tempcf; |
| 3338 | tempCOUNT--; |
| 3339 | } |
| 3340 | break; |
| 3341 | case 2: |
| 3342 | while (tempCOUNT >= 17) tempCOUNT -= 17; |
| 3343 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3344 | of = ((arg >> 15) ^ cf) & 1; |
| 3345 | while (tempCOUNT > 0) { |
| 3346 | tempcf = arg & 1; |
| 3347 | arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); |
| 3348 | cf = tempcf; |
| 3349 | tempCOUNT--; |
| 3350 | } |
| 3351 | break; |
| 3352 | case 1: |
| 3353 | while (tempCOUNT >= 9) tempCOUNT -= 9; |
| 3354 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3355 | of = ((arg >> 7) ^ cf) & 1; |
| 3356 | while (tempCOUNT > 0) { |
| 3357 | tempcf = arg & 1; |
| 3358 | arg = ((arg >> 1) & 0x7FULL) | (cf << 7); |
| 3359 | cf = tempcf; |
| 3360 | tempCOUNT--; |
| 3361 | } |
| 3362 | break; |
| 3363 | default: |
| 3364 | vpanic("calculate_RCR(amd64g): invalid size"); |
| 3365 | } |
| 3366 | |
| 3367 | cf &= 1; |
| 3368 | of &= 1; |
| 3369 | rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| 3370 | rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| 3371 | |
| 3372 | /* caller can ask to have back either the resulting flags or |
| 3373 | resulting value, but not both */ |
| 3374 | return wantRflags ? rflags_in : arg; |
| 3375 | } |
| 3376 | |
sewardj | b5e5c6d | 2007-01-12 20:29:01 +0000 | [diff] [blame] | 3377 | ULong amd64g_calculate_RCL ( ULong arg, |
| 3378 | ULong rot_amt, |
| 3379 | ULong rflags_in, |
| 3380 | Long szIN ) |
| 3381 | { |
| 3382 | Bool wantRflags = toBool(szIN < 0); |
| 3383 | ULong sz = wantRflags ? (-szIN) : szIN; |
| 3384 | ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| 3385 | ULong cf=0, of=0, tempcf; |
| 3386 | |
| 3387 | switch (sz) { |
| 3388 | case 8: |
| 3389 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3390 | while (tempCOUNT > 0) { |
| 3391 | tempcf = (arg >> 63) & 1; |
| 3392 | arg = (arg << 1) | (cf & 1); |
| 3393 | cf = tempcf; |
| 3394 | tempCOUNT--; |
| 3395 | } |
| 3396 | of = ((arg >> 63) ^ cf) & 1; |
| 3397 | break; |
| 3398 | case 4: |
| 3399 | while (tempCOUNT >= 33) tempCOUNT -= 33; |
| 3400 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3401 | while (tempCOUNT > 0) { |
| 3402 | tempcf = (arg >> 31) & 1; |
| 3403 | arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); |
| 3404 | cf = tempcf; |
| 3405 | tempCOUNT--; |
| 3406 | } |
| 3407 | of = ((arg >> 31) ^ cf) & 1; |
| 3408 | break; |
| 3409 | case 2: |
| 3410 | while (tempCOUNT >= 17) tempCOUNT -= 17; |
| 3411 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3412 | while (tempCOUNT > 0) { |
| 3413 | tempcf = (arg >> 15) & 1; |
| 3414 | arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); |
| 3415 | cf = tempcf; |
| 3416 | tempCOUNT--; |
| 3417 | } |
| 3418 | of = ((arg >> 15) ^ cf) & 1; |
| 3419 | break; |
| 3420 | case 1: |
| 3421 | while (tempCOUNT >= 9) tempCOUNT -= 9; |
| 3422 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 3423 | while (tempCOUNT > 0) { |
| 3424 | tempcf = (arg >> 7) & 1; |
| 3425 | arg = 0xFFULL & ((arg << 1) | (cf & 1)); |
| 3426 | cf = tempcf; |
| 3427 | tempCOUNT--; |
| 3428 | } |
| 3429 | of = ((arg >> 7) ^ cf) & 1; |
| 3430 | break; |
| 3431 | default: |
| 3432 | vpanic("calculate_RCL(amd64g): invalid size"); |
| 3433 | } |
| 3434 | |
| 3435 | cf &= 1; |
| 3436 | of &= 1; |
| 3437 | rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| 3438 | rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| 3439 | |
| 3440 | return wantRflags ? rflags_in : arg; |
| 3441 | } |
| 3442 | |
sewardj | 1a179b5 | 2010-09-28 19:56:32 +0000 | [diff] [blame] | 3443 | /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) |
| 3444 | * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 |
| 3445 | */ |
| 3446 | ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) |
| 3447 | { |
| 3448 | ULong hi, lo, tmp, A[16]; |
| 3449 | |
| 3450 | A[0] = 0; A[1] = a; |
| 3451 | A[2] = A[1] << 1; A[3] = A[2] ^ a; |
| 3452 | A[4] = A[2] << 1; A[5] = A[4] ^ a; |
| 3453 | A[6] = A[3] << 1; A[7] = A[6] ^ a; |
| 3454 | A[8] = A[4] << 1; A[9] = A[8] ^ a; |
| 3455 | A[10] = A[5] << 1; A[11] = A[10] ^ a; |
| 3456 | A[12] = A[6] << 1; A[13] = A[12] ^ a; |
| 3457 | A[14] = A[7] << 1; A[15] = A[14] ^ a; |
| 3458 | |
| 3459 | lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; |
| 3460 | hi = lo >> 56; |
| 3461 | lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; |
| 3462 | hi = (hi << 8) | (lo >> 56); |
| 3463 | lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; |
| 3464 | hi = (hi << 8) | (lo >> 56); |
| 3465 | lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; |
| 3466 | hi = (hi << 8) | (lo >> 56); |
| 3467 | lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; |
| 3468 | hi = (hi << 8) | (lo >> 56); |
| 3469 | lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; |
| 3470 | hi = (hi << 8) | (lo >> 56); |
| 3471 | lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; |
| 3472 | hi = (hi << 8) | (lo >> 56); |
| 3473 | lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; |
| 3474 | |
| 3475 | ULong m0 = -1; |
| 3476 | m0 /= 255; |
| 3477 | tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; |
| 3478 | tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; |
| 3479 | tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; |
| 3480 | tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; |
| 3481 | tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; |
| 3482 | tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; |
| 3483 | tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; |
| 3484 | |
| 3485 | return which ? hi : lo; |
| 3486 | } |
| 3487 | |
sewardj | 112b099 | 2005-07-23 13:19:32 +0000 | [diff] [blame] | 3488 | |
sewardj | bc6af53 | 2005-08-23 23:16:51 +0000 | [diff] [blame] | 3489 | /* CALLED FROM GENERATED CODE */ |
| 3490 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3491 | /* Horrible hack. On non-amd64 platforms, return 1. */ |
| 3492 | ULong amd64g_dirtyhelper_RDTSC ( void ) |
| 3493 | { |
| 3494 | # if defined(__x86_64__) |
| 3495 | UInt eax, edx; |
| 3496 | __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); |
| 3497 | return (((ULong)edx) << 32) | ((ULong)eax); |
| 3498 | # else |
| 3499 | return 1ULL; |
| 3500 | # endif |
| 3501 | } |
| 3502 | |
sewardj | 818c730 | 2013-03-26 13:53:18 +0000 | [diff] [blame] | 3503 | /* CALLED FROM GENERATED CODE */ |
| 3504 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3505 | /* Horrible hack. On non-amd64 platforms, return 1. */ |
| 3506 | /* This uses a different calling convention from _RDTSC just above |
| 3507 | only because of the difficulty of returning 96 bits from a C |
| 3508 | function -- RDTSC returns 64 bits and so is simple by comparison, |
| 3509 | on amd64. */ |
| 3510 | void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) |
| 3511 | { |
| 3512 | # if defined(__x86_64__) |
| 3513 | UInt eax, ecx, edx; |
| 3514 | __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); |
| 3515 | st->guest_RAX = (ULong)eax; |
| 3516 | st->guest_RCX = (ULong)ecx; |
| 3517 | st->guest_RDX = (ULong)edx; |
| 3518 | # else |
| 3519 | /* Do nothing. */ |
| 3520 | # endif |
| 3521 | } |
sewardj | bc6af53 | 2005-08-23 23:16:51 +0000 | [diff] [blame] | 3522 | |
sewardj | bb4396c | 2007-11-20 17:29:08 +0000 | [diff] [blame] | 3523 | /* CALLED FROM GENERATED CODE */ |
| 3524 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3525 | /* Horrible hack. On non-amd64 platforms, return 0. */ |
| 3526 | ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) |
| 3527 | { |
| 3528 | # if defined(__x86_64__) |
| 3529 | ULong r = 0; |
| 3530 | portno &= 0xFFFF; |
| 3531 | switch (sz) { |
| 3532 | case 4: |
| 3533 | __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" |
| 3534 | : "=a" (r) : "Nd" (portno)); |
| 3535 | break; |
| 3536 | case 2: |
| 3537 | __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" |
| 3538 | : "=a" (r) : "Nd" (portno)); |
| 3539 | break; |
| 3540 | case 1: |
| 3541 | __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" |
| 3542 | : "=a" (r) : "Nd" (portno)); |
| 3543 | break; |
| 3544 | default: |
| 3545 | break; /* note: no 64-bit version of insn exists */ |
| 3546 | } |
| 3547 | return r; |
| 3548 | # else |
| 3549 | return 0; |
| 3550 | # endif |
| 3551 | } |
| 3552 | |
| 3553 | |
| 3554 | /* CALLED FROM GENERATED CODE */ |
| 3555 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3556 | /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| 3557 | void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) |
| 3558 | { |
| 3559 | # if defined(__x86_64__) |
| 3560 | portno &= 0xFFFF; |
| 3561 | switch (sz) { |
| 3562 | case 4: |
| 3563 | __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" |
| 3564 | : : "a" (data), "Nd" (portno)); |
| 3565 | break; |
| 3566 | case 2: |
| 3567 | __asm__ __volatile__("outw %w0, %w1" |
| 3568 | : : "a" (data), "Nd" (portno)); |
| 3569 | break; |
| 3570 | case 1: |
| 3571 | __asm__ __volatile__("outb %b0, %w1" |
| 3572 | : : "a" (data), "Nd" (portno)); |
| 3573 | break; |
| 3574 | default: |
| 3575 | break; /* note: no 64-bit version of insn exists */ |
| 3576 | } |
| 3577 | # else |
| 3578 | /* do nothing */ |
| 3579 | # endif |
| 3580 | } |
| 3581 | |
sewardj | b9dc243 | 2010-06-07 16:22:22 +0000 | [diff] [blame] | 3582 | /* CALLED FROM GENERATED CODE */ |
| 3583 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3584 | /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| 3585 | /* op = 0: call the native SGDT instruction. |
| 3586 | op = 1: call the native SIDT instruction. |
| 3587 | */ |
| 3588 | void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { |
| 3589 | # if defined(__x86_64__) |
| 3590 | switch (op) { |
| 3591 | case 0: |
| 3592 | __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); |
| 3593 | break; |
| 3594 | case 1: |
| 3595 | __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); |
| 3596 | break; |
| 3597 | default: |
| 3598 | vpanic("amd64g_dirtyhelper_SxDT"); |
| 3599 | } |
| 3600 | # else |
| 3601 | /* do nothing */ |
| 3602 | UChar* p = (UChar*)address; |
| 3603 | p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; |
| 3604 | p[6] = p[7] = p[8] = p[9] = 0; |
| 3605 | # endif |
| 3606 | } |
sewardj | bb4396c | 2007-11-20 17:29:08 +0000 | [diff] [blame] | 3607 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3608 | /*---------------------------------------------------------------*/ |
| 3609 | /*--- Helpers for MMX/SSE/SSE2. ---*/ |
| 3610 | /*---------------------------------------------------------------*/ |
| 3611 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3612 | static inline UChar abdU8 ( UChar xx, UChar yy ) { |
| 3613 | return toUChar(xx>yy ? xx-yy : yy-xx); |
| 3614 | } |
| 3615 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3616 | static inline ULong mk32x2 ( UInt w1, UInt w0 ) { |
| 3617 | return (((ULong)w1) << 32) | ((ULong)w0); |
| 3618 | } |
| 3619 | |
| 3620 | static inline UShort sel16x4_3 ( ULong w64 ) { |
| 3621 | UInt hi32 = toUInt(w64 >> 32); |
| 3622 | return toUShort(hi32 >> 16); |
| 3623 | } |
| 3624 | static inline UShort sel16x4_2 ( ULong w64 ) { |
| 3625 | UInt hi32 = toUInt(w64 >> 32); |
| 3626 | return toUShort(hi32); |
| 3627 | } |
| 3628 | static inline UShort sel16x4_1 ( ULong w64 ) { |
| 3629 | UInt lo32 = toUInt(w64); |
| 3630 | return toUShort(lo32 >> 16); |
| 3631 | } |
| 3632 | static inline UShort sel16x4_0 ( ULong w64 ) { |
| 3633 | UInt lo32 = toUInt(w64); |
| 3634 | return toUShort(lo32); |
| 3635 | } |
| 3636 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3637 | static inline UChar sel8x8_7 ( ULong w64 ) { |
| 3638 | UInt hi32 = toUInt(w64 >> 32); |
| 3639 | return toUChar(hi32 >> 24); |
| 3640 | } |
| 3641 | static inline UChar sel8x8_6 ( ULong w64 ) { |
| 3642 | UInt hi32 = toUInt(w64 >> 32); |
| 3643 | return toUChar(hi32 >> 16); |
| 3644 | } |
| 3645 | static inline UChar sel8x8_5 ( ULong w64 ) { |
| 3646 | UInt hi32 = toUInt(w64 >> 32); |
| 3647 | return toUChar(hi32 >> 8); |
| 3648 | } |
| 3649 | static inline UChar sel8x8_4 ( ULong w64 ) { |
| 3650 | UInt hi32 = toUInt(w64 >> 32); |
| 3651 | return toUChar(hi32 >> 0); |
| 3652 | } |
| 3653 | static inline UChar sel8x8_3 ( ULong w64 ) { |
| 3654 | UInt lo32 = toUInt(w64); |
| 3655 | return toUChar(lo32 >> 24); |
| 3656 | } |
| 3657 | static inline UChar sel8x8_2 ( ULong w64 ) { |
| 3658 | UInt lo32 = toUInt(w64); |
| 3659 | return toUChar(lo32 >> 16); |
| 3660 | } |
| 3661 | static inline UChar sel8x8_1 ( ULong w64 ) { |
| 3662 | UInt lo32 = toUInt(w64); |
| 3663 | return toUChar(lo32 >> 8); |
| 3664 | } |
| 3665 | static inline UChar sel8x8_0 ( ULong w64 ) { |
| 3666 | UInt lo32 = toUInt(w64); |
| 3667 | return toUChar(lo32 >> 0); |
| 3668 | } |
| 3669 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3670 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3671 | ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) |
| 3672 | { |
| 3673 | return |
| 3674 | mk32x2( |
| 3675 | (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) |
| 3676 | + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), |
| 3677 | (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) |
| 3678 | + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) |
| 3679 | ); |
| 3680 | } |
| 3681 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3682 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3683 | ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) |
| 3684 | { |
| 3685 | UInt t = 0; |
| 3686 | t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); |
| 3687 | t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); |
| 3688 | t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); |
| 3689 | t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); |
| 3690 | t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| 3691 | t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| 3692 | t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| 3693 | t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| 3694 | t &= 0xFFFF; |
| 3695 | return (ULong)t; |
| 3696 | } |
| 3697 | |
sewardj | adffcef | 2005-05-11 00:03:06 +0000 | [diff] [blame] | 3698 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | 8cb931e | 2012-02-16 22:02:14 +0000 | [diff] [blame] | 3699 | ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) |
| 3700 | { |
| 3701 | UShort t, min; |
| 3702 | UInt idx; |
| 3703 | t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } |
| 3704 | t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } |
| 3705 | t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } |
| 3706 | t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } |
| 3707 | t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } |
| 3708 | t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } |
| 3709 | t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } |
| 3710 | t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } |
| 3711 | return ((ULong)(idx << 16)) | ((ULong)min); |
| 3712 | } |
| 3713 | |
| 3714 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | 186f869 | 2011-01-21 17:51:44 +0000 | [diff] [blame] | 3715 | ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) |
| 3716 | { |
| 3717 | UInt i; |
| 3718 | ULong crc = (b & 0xFFULL) ^ crcIn; |
| 3719 | for (i = 0; i < 8; i++) |
| 3720 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3721 | return crc; |
| 3722 | } |
| 3723 | |
| 3724 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3725 | ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) |
| 3726 | { |
| 3727 | UInt i; |
| 3728 | ULong crc = (w & 0xFFFFULL) ^ crcIn; |
| 3729 | for (i = 0; i < 16; i++) |
| 3730 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3731 | return crc; |
| 3732 | } |
| 3733 | |
| 3734 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3735 | ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) |
| 3736 | { |
| 3737 | UInt i; |
| 3738 | ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; |
| 3739 | for (i = 0; i < 32; i++) |
| 3740 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3741 | return crc; |
| 3742 | } |
| 3743 | |
| 3744 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3745 | ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) |
| 3746 | { |
| 3747 | ULong crc = amd64g_calc_crc32l(crcIn, q); |
| 3748 | return amd64g_calc_crc32l(crc, q >> 32); |
| 3749 | } |
| 3750 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 3751 | |
sewardj | 4d5bce2 | 2012-02-21 11:02:44 +0000 | [diff] [blame] | 3752 | /* .. helper for next fn .. */ |
| 3753 | static inline ULong sad_8x4 ( ULong xx, ULong yy ) |
| 3754 | { |
| 3755 | UInt t = 0; |
| 3756 | t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| 3757 | t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| 3758 | t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| 3759 | t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| 3760 | return (ULong)t; |
| 3761 | } |
| 3762 | |
| 3763 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3764 | ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo, |
| 3765 | ULong dHi, ULong dLo, |
| 3766 | ULong imm_and_return_control_bit ) |
| 3767 | { |
| 3768 | UInt imm8 = imm_and_return_control_bit & 7; |
| 3769 | Bool calcHi = (imm_and_return_control_bit >> 7) & 1; |
| 3770 | UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */ |
| 3771 | UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */ |
| 3772 | /* For src we only need 32 bits, so get them into the |
| 3773 | lower half of a 64 bit word. */ |
| 3774 | ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1)); |
| 3775 | /* For dst we need to get hold of 56 bits (7 bytes) from a total of |
| 3776 | 11 bytes. If calculating the low part of the result, need bytes |
| 3777 | dstOffsL * 4 + (0 .. 6); if calculating the high part, |
| 3778 | dstOffsL * 4 + (4 .. 10). */ |
| 3779 | ULong dst; |
| 3780 | /* dstOffL = 0, Lo -> 0 .. 6 |
| 3781 | dstOffL = 1, Lo -> 4 .. 10 |
| 3782 | dstOffL = 0, Hi -> 4 .. 10 |
| 3783 | dstOffL = 1, Hi -> 8 .. 14 |
| 3784 | */ |
| 3785 | if (calcHi && dstOffsL) { |
| 3786 | /* 8 .. 14 */ |
| 3787 | dst = dHi & 0x00FFFFFFFFFFFFFFULL; |
| 3788 | } |
| 3789 | else if (!calcHi && !dstOffsL) { |
| 3790 | /* 0 .. 6 */ |
| 3791 | dst = dLo & 0x00FFFFFFFFFFFFFFULL; |
| 3792 | } |
| 3793 | else { |
| 3794 | /* 4 .. 10 */ |
| 3795 | dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32); |
| 3796 | } |
| 3797 | ULong r0 = sad_8x4( dst >> 0, src ); |
| 3798 | ULong r1 = sad_8x4( dst >> 8, src ); |
| 3799 | ULong r2 = sad_8x4( dst >> 16, src ); |
| 3800 | ULong r3 = sad_8x4( dst >> 24, src ); |
| 3801 | ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0; |
| 3802 | return res; |
| 3803 | } |
| 3804 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 3805 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3806 | ULong amd64g_calculate_pext ( ULong src_masked, ULong mask ) |
| 3807 | { |
| 3808 | ULong dst = 0; |
| 3809 | ULong src_bit; |
| 3810 | ULong dst_bit = 1; |
| 3811 | for (src_bit = 1; src_bit; src_bit <<= 1) { |
| 3812 | if (mask & src_bit) { |
| 3813 | if (src_masked & src_bit) dst |= dst_bit; |
| 3814 | dst_bit <<= 1; |
| 3815 | } |
| 3816 | } |
| 3817 | return dst; |
| 3818 | } |
| 3819 | |
| 3820 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3821 | ULong amd64g_calculate_pdep ( ULong src, ULong mask ) |
| 3822 | { |
| 3823 | ULong dst = 0; |
| 3824 | ULong dst_bit; |
| 3825 | ULong src_bit = 1; |
| 3826 | for (dst_bit = 1; dst_bit; dst_bit <<= 1) { |
| 3827 | if (mask & dst_bit) { |
| 3828 | if (src & src_bit) dst |= dst_bit; |
| 3829 | src_bit <<= 1; |
| 3830 | } |
| 3831 | } |
| 3832 | return dst; |
| 3833 | } |
| 3834 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3835 | /*---------------------------------------------------------------*/ |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3836 | /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ |
| 3837 | /*---------------------------------------------------------------*/ |
| 3838 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3839 | static UInt zmask_from_V128 ( V128* arg ) |
| 3840 | { |
| 3841 | UInt i, res = 0; |
| 3842 | for (i = 0; i < 16; i++) { |
| 3843 | res |= ((arg->w8[i] == 0) ? 1 : 0) << i; |
| 3844 | } |
| 3845 | return res; |
| 3846 | } |
| 3847 | |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3848 | static UInt zmask_from_V128_wide ( V128* arg ) |
| 3849 | { |
| 3850 | UInt i, res = 0; |
| 3851 | for (i = 0; i < 8; i++) { |
| 3852 | res |= ((arg->w16[i] == 0) ? 1 : 0) << i; |
| 3853 | } |
| 3854 | return res; |
| 3855 | } |
| 3856 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3857 | /* Helps with PCMP{I,E}STR{I,M}. |
| 3858 | |
| 3859 | CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3860 | actually it could be a clean helper, but for the fact that we can't |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3861 | pass by value 2 x V128 to a clean helper, nor have one returned.) |
| 3862 | Reads guest state, writes to guest state for the xSTRM cases, no |
| 3863 | accesses of memory, is a pure function. |
| 3864 | |
| 3865 | opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so |
| 3866 | the callee knows which I/E and I/M variant it is dealing with and |
| 3867 | what the specific operation is. 4th byte of opcode is in the range |
| 3868 | 0x60 to 0x63: |
| 3869 | istri 66 0F 3A 63 |
| 3870 | istrm 66 0F 3A 62 |
| 3871 | estri 66 0F 3A 61 |
| 3872 | estrm 66 0F 3A 60 |
| 3873 | |
| 3874 | gstOffL and gstOffR are the guest state offsets for the two XMM |
| 3875 | register inputs. We never have to deal with the memory case since |
| 3876 | that is handled by pre-loading the relevant value into the fake |
| 3877 | XMM16 register. |
| 3878 | |
| 3879 | For ESTRx variants, edxIN and eaxIN hold the values of those two |
| 3880 | registers. |
| 3881 | |
| 3882 | In all cases, the bottom 16 bits of the result contain the new |
| 3883 | OSZACP %rflags values. For xSTRI variants, bits[31:16] of the |
| 3884 | result hold the new %ecx value. For xSTRM variants, the helper |
| 3885 | writes the result directly to the guest XMM0. |
| 3886 | |
| 3887 | Declarable side effects: in all cases, reads guest state at |
| 3888 | [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes |
| 3889 | guest_XMM0. |
| 3890 | |
| 3891 | Is expected to be called with opc_and_imm combinations which have |
| 3892 | actually been validated, and will assert if otherwise. The front |
| 3893 | end should ensure we're only called with verified values. |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3894 | */ |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3895 | ULong amd64g_dirtyhelper_PCMPxSTRx ( |
| 3896 | VexGuestAMD64State* gst, |
| 3897 | HWord opc4_and_imm, |
| 3898 | HWord gstOffL, HWord gstOffR, |
| 3899 | HWord edxIN, HWord eaxIN |
| 3900 | ) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3901 | { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3902 | HWord opc4 = (opc4_and_imm >> 8) & 0xFF; |
| 3903 | HWord imm8 = opc4_and_imm & 0xFF; |
| 3904 | HWord isISTRx = opc4 & 2; |
| 3905 | HWord isxSTRM = (opc4 & 1) ^ 1; |
| 3906 | vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3907 | HWord wide = (imm8 & 1); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3908 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3909 | // where the args are |
| 3910 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 3911 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3912 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3913 | /* Create the arg validity masks, either from the vectors |
| 3914 | themselves or from the supplied edx/eax values. */ |
| 3915 | // FIXME: this is only right for the 8-bit data cases. |
| 3916 | // At least that is asserted above. |
| 3917 | UInt zmaskL, zmaskR; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3918 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3919 | // temp spot for the resulting flags and vector. |
| 3920 | V128 resV; |
| 3921 | UInt resOSZACP; |
| 3922 | |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3923 | // for checking whether case was handled |
| 3924 | Bool ok = False; |
| 3925 | |
| 3926 | if (wide) { |
| 3927 | if (isISTRx) { |
| 3928 | zmaskL = zmask_from_V128_wide(argL); |
| 3929 | zmaskR = zmask_from_V128_wide(argR); |
| 3930 | } else { |
| 3931 | Int tmp; |
| 3932 | tmp = edxIN & 0xFFFFFFFF; |
| 3933 | if (tmp < -8) tmp = -8; |
| 3934 | if (tmp > 8) tmp = 8; |
| 3935 | if (tmp < 0) tmp = -tmp; |
| 3936 | vassert(tmp >= 0 && tmp <= 8); |
| 3937 | zmaskL = (1 << tmp) & 0xFF; |
| 3938 | tmp = eaxIN & 0xFFFFFFFF; |
| 3939 | if (tmp < -8) tmp = -8; |
| 3940 | if (tmp > 8) tmp = 8; |
| 3941 | if (tmp < 0) tmp = -tmp; |
| 3942 | vassert(tmp >= 0 && tmp <= 8); |
| 3943 | zmaskR = (1 << tmp) & 0xFF; |
| 3944 | } |
| 3945 | // do the meyaath |
| 3946 | ok = compute_PCMPxSTRx_wide ( |
| 3947 | &resV, &resOSZACP, argL, argR, |
| 3948 | zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| 3949 | ); |
| 3950 | } else { |
| 3951 | if (isISTRx) { |
| 3952 | zmaskL = zmask_from_V128(argL); |
| 3953 | zmaskR = zmask_from_V128(argR); |
| 3954 | } else { |
| 3955 | Int tmp; |
| 3956 | tmp = edxIN & 0xFFFFFFFF; |
| 3957 | if (tmp < -16) tmp = -16; |
| 3958 | if (tmp > 16) tmp = 16; |
| 3959 | if (tmp < 0) tmp = -tmp; |
| 3960 | vassert(tmp >= 0 && tmp <= 16); |
| 3961 | zmaskL = (1 << tmp) & 0xFFFF; |
| 3962 | tmp = eaxIN & 0xFFFFFFFF; |
| 3963 | if (tmp < -16) tmp = -16; |
| 3964 | if (tmp > 16) tmp = 16; |
| 3965 | if (tmp < 0) tmp = -tmp; |
| 3966 | vassert(tmp >= 0 && tmp <= 16); |
| 3967 | zmaskR = (1 << tmp) & 0xFFFF; |
| 3968 | } |
| 3969 | // do the meyaath |
| 3970 | ok = compute_PCMPxSTRx ( |
| 3971 | &resV, &resOSZACP, argL, argR, |
| 3972 | zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| 3973 | ); |
| 3974 | } |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3975 | |
| 3976 | // front end shouldn't pass us any imm8 variants we can't |
| 3977 | // handle. Hence: |
| 3978 | vassert(ok); |
| 3979 | |
| 3980 | // So, finally we need to get the results back to the caller. |
| 3981 | // In all cases, the new OSZACP value is the lowest 16 of |
| 3982 | // the return value. |
| 3983 | if (isxSTRM) { |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 3984 | gst->guest_YMM0[0] = resV.w32[0]; |
| 3985 | gst->guest_YMM0[1] = resV.w32[1]; |
| 3986 | gst->guest_YMM0[2] = resV.w32[2]; |
| 3987 | gst->guest_YMM0[3] = resV.w32[3]; |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3988 | return resOSZACP & 0x8D5; |
| 3989 | } else { |
| 3990 | UInt newECX = resV.w32[0] & 0xFFFF; |
| 3991 | return (newECX << 16) | (resOSZACP & 0x8D5); |
| 3992 | } |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3993 | } |
| 3994 | |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3995 | /*---------------------------------------------------------------*/ |
| 3996 | /*--- AES primitives and helpers ---*/ |
| 3997 | /*---------------------------------------------------------------*/ |
| 3998 | /* a 16 x 16 matrix */ |
| 3999 | static const UChar sbox[256] = { // row nr |
| 4000 | 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1 |
| 4001 | 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, |
| 4002 | 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2 |
| 4003 | 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, |
| 4004 | 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3 |
| 4005 | 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, |
| 4006 | 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4 |
| 4007 | 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, |
| 4008 | 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5 |
| 4009 | 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, |
| 4010 | 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6 |
| 4011 | 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, |
| 4012 | 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7 |
| 4013 | 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, |
| 4014 | 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8 |
| 4015 | 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, |
| 4016 | 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9 |
| 4017 | 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, |
| 4018 | 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10 |
| 4019 | 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, |
| 4020 | 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11 |
| 4021 | 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, |
| 4022 | 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12 |
| 4023 | 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, |
| 4024 | 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13 |
| 4025 | 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, |
| 4026 | 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14 |
| 4027 | 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, |
| 4028 | 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15 |
| 4029 | 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, |
| 4030 | 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16 |
| 4031 | 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 |
| 4032 | }; |
| 4033 | static void SubBytes (V128* v) |
| 4034 | { |
| 4035 | V128 r; |
| 4036 | UInt i; |
| 4037 | for (i = 0; i < 16; i++) |
| 4038 | r.w8[i] = sbox[v->w8[i]]; |
| 4039 | *v = r; |
| 4040 | } |
| 4041 | |
| 4042 | /* a 16 x 16 matrix */ |
| 4043 | static const UChar invsbox[256] = { // row nr |
| 4044 | 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1 |
| 4045 | 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, |
| 4046 | 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2 |
| 4047 | 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, |
| 4048 | 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3 |
| 4049 | 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, |
| 4050 | 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4 |
| 4051 | 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, |
| 4052 | 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5 |
| 4053 | 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, |
| 4054 | 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6 |
| 4055 | 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, |
| 4056 | 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7 |
| 4057 | 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, |
| 4058 | 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8 |
| 4059 | 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, |
| 4060 | 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9 |
| 4061 | 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, |
| 4062 | 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10 |
| 4063 | 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, |
| 4064 | 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11 |
| 4065 | 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, |
| 4066 | 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12 |
| 4067 | 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, |
| 4068 | 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13 |
| 4069 | 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, |
| 4070 | 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14 |
| 4071 | 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, |
| 4072 | 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15 |
| 4073 | 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, |
| 4074 | 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16 |
| 4075 | 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d |
| 4076 | }; |
| 4077 | static void InvSubBytes (V128* v) |
| 4078 | { |
| 4079 | V128 r; |
| 4080 | UInt i; |
| 4081 | for (i = 0; i < 16; i++) |
| 4082 | r.w8[i] = invsbox[v->w8[i]]; |
| 4083 | *v = r; |
| 4084 | } |
| 4085 | |
| 4086 | static const UChar ShiftRows_op[16] = |
| 4087 | {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0}; |
| 4088 | static void ShiftRows (V128* v) |
| 4089 | { |
| 4090 | V128 r; |
| 4091 | UInt i; |
| 4092 | for (i = 0; i < 16; i++) |
| 4093 | r.w8[i] = v->w8[ShiftRows_op[15-i]]; |
| 4094 | *v = r; |
| 4095 | } |
| 4096 | |
| 4097 | static const UChar InvShiftRows_op[16] = |
| 4098 | {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0}; |
| 4099 | static void InvShiftRows (V128* v) |
| 4100 | { |
| 4101 | V128 r; |
| 4102 | UInt i; |
| 4103 | for (i = 0; i < 16; i++) |
| 4104 | r.w8[i] = v->w8[InvShiftRows_op[15-i]]; |
| 4105 | *v = r; |
| 4106 | } |
| 4107 | |
| 4108 | /* Multiplication of the finite fields elements of AES. |
| 4109 | See "A Specification for The AES Algorithm Rijndael |
| 4110 | (by Joan Daemen & Vincent Rijmen)" |
| 4111 | Dr. Brian Gladman, v3.1, 3rd March 2001. */ |
| 4112 | /* N values so that (hex) xy = 0x03^N. |
| 4113 | 0x00 cannot be used. We put 0xff for this value.*/ |
| 4114 | /* a 16 x 16 matrix */ |
| 4115 | static const UChar Nxy[256] = { // row nr |
| 4116 | 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1 |
| 4117 | 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, |
| 4118 | 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2 |
| 4119 | 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, |
| 4120 | 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3 |
| 4121 | 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, |
| 4122 | 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4 |
| 4123 | 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, |
| 4124 | 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5 |
| 4125 | 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, |
| 4126 | 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6 |
| 4127 | 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, |
| 4128 | 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7 |
| 4129 | 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, |
| 4130 | 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8 |
| 4131 | 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, |
| 4132 | 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9 |
| 4133 | 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, |
| 4134 | 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10 |
| 4135 | 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, |
| 4136 | 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11 |
| 4137 | 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, |
| 4138 | 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12 |
| 4139 | 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, |
| 4140 | 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13 |
| 4141 | 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, |
| 4142 | 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14 |
| 4143 | 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, |
| 4144 | 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15 |
| 4145 | 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, |
| 4146 | 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16 |
| 4147 | 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07 |
| 4148 | }; |
| 4149 | |
| 4150 | /* E values so that E = 0x03^xy. */ |
| 4151 | static const UChar Exy[256] = { // row nr |
| 4152 | 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1 |
| 4153 | 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35, |
| 4154 | 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2 |
| 4155 | 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa, |
| 4156 | 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3 |
| 4157 | 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31, |
| 4158 | 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4 |
| 4159 | 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd, |
| 4160 | 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5 |
| 4161 | 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88, |
| 4162 | 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6 |
| 4163 | 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a, |
| 4164 | 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7 |
| 4165 | 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3, |
| 4166 | 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8 |
| 4167 | 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0, |
| 4168 | 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9 |
| 4169 | 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41, |
| 4170 | 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10 |
| 4171 | 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75, |
| 4172 | 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11 |
| 4173 | 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80, |
| 4174 | 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12 |
| 4175 | 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54, |
| 4176 | 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13 |
| 4177 | 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca, |
| 4178 | 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14 |
| 4179 | 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e, |
| 4180 | 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15 |
| 4181 | 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17, |
| 4182 | 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16 |
| 4183 | 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01}; |
| 4184 | |
| 4185 | static inline UChar ff_mul(UChar u1, UChar u2) |
| 4186 | { |
| 4187 | if ((u1 > 0) && (u2 > 0)) { |
| 4188 | UInt ui = Nxy[u1] + Nxy[u2]; |
| 4189 | if (ui >= 255) |
| 4190 | ui = ui - 255; |
| 4191 | return Exy[ui]; |
| 4192 | } else { |
| 4193 | return 0; |
| 4194 | }; |
| 4195 | } |
| 4196 | |
| 4197 | static void MixColumns (V128* v) |
| 4198 | { |
| 4199 | V128 r; |
| 4200 | Int j; |
| 4201 | #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| 4202 | for (j = 0; j < 4; j++) { |
| 4203 | P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1)) |
| 4204 | ^ P(v,j,2) ^ P(v,j,3); |
| 4205 | P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) ) |
| 4206 | ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3); |
| 4207 | P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) ) |
| 4208 | ^ ff_mul(0x03, P(v,j,3) ); |
| 4209 | P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2) |
| 4210 | ^ ff_mul( 0x02, P(v,j,3) ); |
| 4211 | } |
| 4212 | *v = r; |
| 4213 | #undef P |
| 4214 | } |
| 4215 | |
| 4216 | static void InvMixColumns (V128* v) |
| 4217 | { |
| 4218 | V128 r; |
| 4219 | Int j; |
| 4220 | #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| 4221 | for (j = 0; j < 4; j++) { |
| 4222 | P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) ) |
| 4223 | ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) ); |
| 4224 | P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) ) |
| 4225 | ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) ); |
| 4226 | P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) ) |
| 4227 | ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) ); |
| 4228 | P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) ) |
| 4229 | ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) ); |
| 4230 | } |
| 4231 | *v = r; |
| 4232 | #undef P |
| 4233 | |
| 4234 | } |
| 4235 | |
| 4236 | /* For description, see definition in guest_amd64_defs.h */ |
| 4237 | void amd64g_dirtyhelper_AES ( |
| 4238 | VexGuestAMD64State* gst, |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4239 | HWord opc4, HWord gstOffD, |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4240 | HWord gstOffL, HWord gstOffR |
| 4241 | ) |
| 4242 | { |
| 4243 | // where the args are |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4244 | V128* argD = (V128*)( ((UChar*)gst) + gstOffD ); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4245 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 4246 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4247 | V128 r; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4248 | |
| 4249 | switch (opc4) { |
| 4250 | case 0xDC: /* AESENC */ |
| 4251 | case 0xDD: /* AESENCLAST */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4252 | r = *argR; |
| 4253 | ShiftRows (&r); |
| 4254 | SubBytes (&r); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4255 | if (opc4 == 0xDC) |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4256 | MixColumns (&r); |
| 4257 | argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| 4258 | argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4259 | break; |
| 4260 | |
| 4261 | case 0xDE: /* AESDEC */ |
| 4262 | case 0xDF: /* AESDECLAST */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4263 | r = *argR; |
| 4264 | InvShiftRows (&r); |
| 4265 | InvSubBytes (&r); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4266 | if (opc4 == 0xDE) |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4267 | InvMixColumns (&r); |
| 4268 | argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| 4269 | argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4270 | break; |
| 4271 | |
| 4272 | case 0xDB: /* AESIMC */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 4273 | *argD = *argL; |
| 4274 | InvMixColumns (argD); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4275 | break; |
| 4276 | default: vassert(0); |
| 4277 | } |
| 4278 | } |
| 4279 | |
| 4280 | static inline UInt RotWord (UInt w32) |
| 4281 | { |
| 4282 | return ((w32 >> 8) | (w32 << 24)); |
| 4283 | } |
| 4284 | |
| 4285 | static inline UInt SubWord (UInt w32) |
| 4286 | { |
| 4287 | UChar *w8; |
| 4288 | UChar *r8; |
| 4289 | UInt res; |
| 4290 | w8 = (UChar*) &w32; |
| 4291 | r8 = (UChar*) &res; |
| 4292 | r8[0] = sbox[w8[0]]; |
| 4293 | r8[1] = sbox[w8[1]]; |
| 4294 | r8[2] = sbox[w8[2]]; |
| 4295 | r8[3] = sbox[w8[3]]; |
| 4296 | return res; |
| 4297 | } |
| 4298 | |
| 4299 | /* For description, see definition in guest_amd64_defs.h */ |
| 4300 | extern void amd64g_dirtyhelper_AESKEYGENASSIST ( |
| 4301 | VexGuestAMD64State* gst, |
| 4302 | HWord imm8, |
| 4303 | HWord gstOffL, HWord gstOffR |
| 4304 | ) |
| 4305 | { |
| 4306 | // where the args are |
| 4307 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 4308 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
| 4309 | |
sewardj | a35a6db | 2014-12-09 21:01:28 +0000 | [diff] [blame] | 4310 | // We have to create the result in a temporary in the |
| 4311 | // case where the src and dst regs are the same. See #341698. |
| 4312 | V128 tmp; |
| 4313 | |
| 4314 | tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8; |
| 4315 | tmp.w32[2] = SubWord (argL->w32[3]); |
| 4316 | tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8; |
| 4317 | tmp.w32[0] = SubWord (argL->w32[1]); |
| 4318 | |
| 4319 | argR->w32[3] = tmp.w32[3]; |
| 4320 | argR->w32[2] = tmp.w32[2]; |
| 4321 | argR->w32[1] = tmp.w32[1]; |
| 4322 | argR->w32[0] = tmp.w32[0]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 4323 | } |
| 4324 | |
| 4325 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 4326 | |
| 4327 | /*---------------------------------------------------------------*/ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4328 | /*--- Helpers for dealing with, and describing, ---*/ |
| 4329 | /*--- guest state as a whole. ---*/ |
| 4330 | /*---------------------------------------------------------------*/ |
| 4331 | |
| 4332 | /* Initialise the entire amd64 guest state. */ |
| 4333 | /* VISIBLE TO LIBVEX CLIENT */ |
| 4334 | void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) |
| 4335 | { |
sewardj | c6f970f | 2012-04-02 21:54:49 +0000 | [diff] [blame] | 4336 | vex_state->host_EvC_FAILADDR = 0; |
| 4337 | vex_state->host_EvC_COUNTER = 0; |
| 4338 | vex_state->pad0 = 0; |
| 4339 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4340 | vex_state->guest_RAX = 0; |
| 4341 | vex_state->guest_RCX = 0; |
| 4342 | vex_state->guest_RDX = 0; |
| 4343 | vex_state->guest_RBX = 0; |
| 4344 | vex_state->guest_RSP = 0; |
| 4345 | vex_state->guest_RBP = 0; |
| 4346 | vex_state->guest_RSI = 0; |
| 4347 | vex_state->guest_RDI = 0; |
| 4348 | vex_state->guest_R8 = 0; |
| 4349 | vex_state->guest_R9 = 0; |
| 4350 | vex_state->guest_R10 = 0; |
| 4351 | vex_state->guest_R11 = 0; |
| 4352 | vex_state->guest_R12 = 0; |
| 4353 | vex_state->guest_R13 = 0; |
| 4354 | vex_state->guest_R14 = 0; |
| 4355 | vex_state->guest_R15 = 0; |
| 4356 | |
| 4357 | vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| 4358 | vex_state->guest_CC_DEP1 = 0; |
| 4359 | vex_state->guest_CC_DEP2 = 0; |
| 4360 | vex_state->guest_CC_NDEP = 0; |
| 4361 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 4362 | vex_state->guest_DFLAG = 1; /* forwards */ |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 4363 | vex_state->guest_IDFLAG = 0; |
sewardj | 0e457fc | 2013-12-11 16:47:59 +0000 | [diff] [blame] | 4364 | vex_state->guest_ACFLAG = 0; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4365 | |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 4366 | /* HACK: represent the offset associated with a constant %fs. |
| 4367 | Typically, on linux, this assumes that %fs is only ever zero (main |
| 4368 | thread) or 0x63. */ |
| 4369 | vex_state->guest_FS_CONST = 0; |
sewardj | a6b93d1 | 2005-02-17 09:28:28 +0000 | [diff] [blame] | 4370 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4371 | vex_state->guest_RIP = 0; |
| 4372 | |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 4373 | /* Initialise the simulated FPU */ |
| 4374 | amd64g_dirtyhelper_FINIT( vex_state ); |
| 4375 | |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 4376 | /* Initialise the AVX state. */ |
| 4377 | # define AVXZERO(_ymm) \ |
| 4378 | do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \ |
| 4379 | _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \ |
| 4380 | } while (0) |
sewardj | cb6091d | 2005-02-21 08:23:39 +0000 | [diff] [blame] | 4381 | vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 4382 | AVXZERO(vex_state->guest_YMM0); |
| 4383 | AVXZERO(vex_state->guest_YMM1); |
| 4384 | AVXZERO(vex_state->guest_YMM2); |
| 4385 | AVXZERO(vex_state->guest_YMM3); |
| 4386 | AVXZERO(vex_state->guest_YMM4); |
| 4387 | AVXZERO(vex_state->guest_YMM5); |
| 4388 | AVXZERO(vex_state->guest_YMM6); |
| 4389 | AVXZERO(vex_state->guest_YMM7); |
| 4390 | AVXZERO(vex_state->guest_YMM8); |
| 4391 | AVXZERO(vex_state->guest_YMM9); |
| 4392 | AVXZERO(vex_state->guest_YMM10); |
| 4393 | AVXZERO(vex_state->guest_YMM11); |
| 4394 | AVXZERO(vex_state->guest_YMM12); |
| 4395 | AVXZERO(vex_state->guest_YMM13); |
| 4396 | AVXZERO(vex_state->guest_YMM14); |
| 4397 | AVXZERO(vex_state->guest_YMM15); |
| 4398 | AVXZERO(vex_state->guest_YMM16); |
sewardj | cb6091d | 2005-02-21 08:23:39 +0000 | [diff] [blame] | 4399 | |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 4400 | # undef AVXZERO |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4401 | |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 4402 | vex_state->guest_EMNOTE = EmNote_NONE; |
sewardj | 1f126c5 | 2005-03-16 13:57:58 +0000 | [diff] [blame] | 4403 | |
| 4404 | /* These should not ever be either read or written, but we |
| 4405 | initialise them anyway. */ |
sewardj | 05f5e01 | 2014-05-04 10:52:11 +0000 | [diff] [blame] | 4406 | vex_state->guest_CMSTART = 0; |
| 4407 | vex_state->guest_CMLEN = 0; |
sewardj | ce02aa7 | 2006-01-12 12:27:58 +0000 | [diff] [blame] | 4408 | |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 4409 | vex_state->guest_NRADDR = 0; |
| 4410 | vex_state->guest_SC_CLASS = 0; |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 4411 | vex_state->guest_GS_CONST = 0; |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 4412 | |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 4413 | vex_state->guest_IP_AT_SYSCALL = 0; |
sewardj | c6f970f | 2012-04-02 21:54:49 +0000 | [diff] [blame] | 4414 | vex_state->pad1 = 0; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 4415 | } |
| 4416 | |
| 4417 | |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4418 | /* Figure out if any part of the guest state contained in minoff |
| 4419 | .. maxoff requires precise memory exceptions. If in doubt return |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4420 | True (but this generates significantly slower code). |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4421 | |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 4422 | By default we enforce precise exns for guest %RSP, %RBP and %RIP |
| 4423 | only. These are the minimum needed to extract correct stack |
| 4424 | backtraces from amd64 code. |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4425 | |
| 4426 | Only %RSP is needed in mode VexRegUpdSpAtMemAccess. |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4427 | */ |
sewardj | ca2c3c7 | 2015-02-05 12:53:20 +0000 | [diff] [blame] | 4428 | Bool guest_amd64_state_requires_precise_mem_exns ( |
| 4429 | Int minoff, Int maxoff, VexRegisterUpdates pxControl |
| 4430 | ) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4431 | { |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 4432 | Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); |
| 4433 | Int rbp_max = rbp_min + 8 - 1; |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4434 | Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); |
| 4435 | Int rsp_max = rsp_min + 8 - 1; |
| 4436 | Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); |
| 4437 | Int rip_max = rip_min + 8 - 1; |
| 4438 | |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4439 | if (maxoff < rsp_min || minoff > rsp_max) { |
| 4440 | /* no overlap with rsp */ |
sewardj | ca2c3c7 | 2015-02-05 12:53:20 +0000 | [diff] [blame] | 4441 | if (pxControl == VexRegUpdSpAtMemAccess) |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4442 | return False; // We only need to check stack pointer. |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 4443 | } else { |
| 4444 | return True; |
| 4445 | } |
| 4446 | |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4447 | if (maxoff < rbp_min || minoff > rbp_max) { |
| 4448 | /* no overlap with rbp */ |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4449 | } else { |
| 4450 | return True; |
| 4451 | } |
| 4452 | |
| 4453 | if (maxoff < rip_min || minoff > rip_max) { |
| 4454 | /* no overlap with eip */ |
| 4455 | } else { |
| 4456 | return True; |
| 4457 | } |
| 4458 | |
| 4459 | return False; |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4460 | } |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4461 | |
| 4462 | |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4463 | #define ALWAYSDEFD(field) \ |
| 4464 | { offsetof(VexGuestAMD64State, field), \ |
| 4465 | (sizeof ((VexGuestAMD64State*)0)->field) } |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4466 | |
| 4467 | VexGuestLayout |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4468 | amd64guest_layout |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4469 | = { |
| 4470 | /* Total size of the guest state, in bytes. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4471 | .total_sizeB = sizeof(VexGuestAMD64State), |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4472 | |
| 4473 | /* Describe the stack pointer. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4474 | .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), |
| 4475 | .sizeof_SP = 8, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4476 | |
sewardj | a203330 | 2008-08-19 11:15:10 +0000 | [diff] [blame] | 4477 | /* Describe the frame pointer. */ |
| 4478 | .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), |
| 4479 | .sizeof_FP = 8, |
| 4480 | |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4481 | /* Describe the instruction pointer. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4482 | .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), |
| 4483 | .sizeof_IP = 8, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4484 | |
| 4485 | /* Describe any sections to be regarded by Memcheck as |
| 4486 | 'always-defined'. */ |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 4487 | .n_alwaysDefd = 16, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4488 | |
| 4489 | /* flags thunk: OP and NDEP are always defd, whereas DEP1 |
| 4490 | and DEP2 have to be tracked. See detailed comment in |
| 4491 | gdefs.h on meaning of thunk fields. */ |
| 4492 | .alwaysDefd |
| 4493 | = { /* 0 */ ALWAYSDEFD(guest_CC_OP), |
| 4494 | /* 1 */ ALWAYSDEFD(guest_CC_NDEP), |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 4495 | /* 2 */ ALWAYSDEFD(guest_DFLAG), |
| 4496 | /* 3 */ ALWAYSDEFD(guest_IDFLAG), |
| 4497 | /* 4 */ ALWAYSDEFD(guest_RIP), |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 4498 | /* 5 */ ALWAYSDEFD(guest_FS_CONST), |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 4499 | /* 6 */ ALWAYSDEFD(guest_FTOP), |
| 4500 | /* 7 */ ALWAYSDEFD(guest_FPTAG), |
| 4501 | /* 8 */ ALWAYSDEFD(guest_FPROUND), |
| 4502 | /* 9 */ ALWAYSDEFD(guest_FC3210), |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 4503 | // /* */ ALWAYSDEFD(guest_CS), |
| 4504 | // /* */ ALWAYSDEFD(guest_DS), |
| 4505 | // /* */ ALWAYSDEFD(guest_ES), |
| 4506 | // /* */ ALWAYSDEFD(guest_FS), |
| 4507 | // /* */ ALWAYSDEFD(guest_GS), |
| 4508 | // /* */ ALWAYSDEFD(guest_SS), |
| 4509 | // /* */ ALWAYSDEFD(guest_LDT), |
| 4510 | // /* */ ALWAYSDEFD(guest_GDT), |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 4511 | /* 10 */ ALWAYSDEFD(guest_EMNOTE), |
sewardj | 16a403b | 2005-07-07 12:26:36 +0000 | [diff] [blame] | 4512 | /* 11 */ ALWAYSDEFD(guest_SSEROUND), |
sewardj | 05f5e01 | 2014-05-04 10:52:11 +0000 | [diff] [blame] | 4513 | /* 12 */ ALWAYSDEFD(guest_CMSTART), |
| 4514 | /* 13 */ ALWAYSDEFD(guest_CMLEN), |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 4515 | /* 14 */ ALWAYSDEFD(guest_SC_CLASS), |
| 4516 | /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4517 | } |
| 4518 | }; |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4519 | |
| 4520 | |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4521 | /*---------------------------------------------------------------*/ |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 4522 | /*--- end guest_amd64_helpers.c ---*/ |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4523 | /*---------------------------------------------------------------*/ |