njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 1 | |
| 2 | /*---------------------------------------------------------------*/ |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 3 | /*--- begin guest_amd64_helpers.c ---*/ |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4 | /*---------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 7 | This file is part of Valgrind, a dynamic binary instrumentation |
| 8 | framework. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 9 | |
sewardj | 89ae847 | 2013-10-18 14:12:58 +0000 | [diff] [blame] | 10 | Copyright (C) 2004-2013 OpenWorks LLP |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 11 | info@open-works.net |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 12 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 17 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
sewardj | 7bd6ffe | 2005-08-03 16:07:36 +0000 | [diff] [blame] | 26 | 02110-1301, USA. |
| 27 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 28 | The GNU General Public License is contained in the file COPYING. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 29 | |
| 30 | Neither the names of the U.S. Department of Energy nor the |
| 31 | University of California nor the names of its contributors may be |
| 32 | used to endorse or promote products derived from this software |
| 33 | without prior written permission. |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 34 | */ |
| 35 | |
| 36 | #include "libvex_basictypes.h" |
florian | 33b0243 | 2012-08-25 21:48:04 +0000 | [diff] [blame] | 37 | #include "libvex_emnote.h" |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 38 | #include "libvex_guest_amd64.h" |
| 39 | #include "libvex_ir.h" |
| 40 | #include "libvex.h" |
| 41 | |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 42 | #include "main_util.h" |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 43 | #include "main_globals.h" |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 44 | #include "guest_generic_bb_to_IR.h" |
| 45 | #include "guest_amd64_defs.h" |
| 46 | #include "guest_generic_x87.h" |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 47 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 48 | |
| 49 | /* This file contains helper functions for amd64 guest code. |
| 50 | Calls to these functions are generated by the back end. |
| 51 | These calls are of course in the host machine code and |
| 52 | this file will be compiled to host machine code, so that |
| 53 | all makes sense. |
| 54 | |
| 55 | Only change the signatures of these helper functions very |
| 56 | carefully. If you change the signature here, you'll have to change |
| 57 | the parameters passed to it in the IR calls constructed by |
| 58 | guest-amd64/toIR.c. |
| 59 | |
| 60 | The convention used is that all functions called from generated |
| 61 | code are named amd64g_<something>, and any function whose name lacks |
| 62 | that prefix is not called from generated code. Note that some |
| 63 | LibVEX_* functions can however be called by VEX's client, but that |
| 64 | is not the same as calling them from VEX-generated code. |
| 65 | */ |
| 66 | |
| 67 | |
| 68 | /* Set to 1 to get detailed profiling info about use of the flag |
| 69 | machinery. */ |
| 70 | #define PROFILE_RFLAGS 0 |
| 71 | |
| 72 | |
| 73 | /*---------------------------------------------------------------*/ |
| 74 | /*--- %rflags run-time helpers. ---*/ |
| 75 | /*---------------------------------------------------------------*/ |
| 76 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 77 | /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags |
| 78 | after imulq/mulq. */ |
| 79 | |
| 80 | static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) |
| 81 | { |
| 82 | ULong u0, v0, w0; |
| 83 | Long u1, v1, w1, w2, t; |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 84 | u0 = u & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 85 | u1 = u >> 32; |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 86 | v0 = v & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 87 | v1 = v >> 32; |
| 88 | w0 = u0 * v0; |
| 89 | t = u1 * v0 + (w0 >> 32); |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 90 | w1 = t & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 91 | w2 = t >> 32; |
| 92 | w1 = u0 * v1 + w1; |
| 93 | *rHi = u1 * v1 + w2 + (w1 >> 32); |
| 94 | *rLo = u * v; |
| 95 | } |
| 96 | |
| 97 | static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) |
| 98 | { |
| 99 | ULong u0, v0, w0; |
| 100 | ULong u1, v1, w1,w2,t; |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 101 | u0 = u & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 102 | u1 = u >> 32; |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 103 | v0 = v & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 104 | v1 = v >> 32; |
| 105 | w0 = u0 * v0; |
| 106 | t = u1 * v0 + (w0 >> 32); |
sewardj | dbdc5b3 | 2005-03-25 20:31:46 +0000 | [diff] [blame] | 107 | w1 = t & 0xFFFFFFFFULL; |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 108 | w2 = t >> 32; |
| 109 | w1 = u0 * v1 + w1; |
| 110 | *rHi = u1 * v1 + w2 + (w1 >> 32); |
| 111 | *rLo = u * v; |
| 112 | } |
| 113 | |
| 114 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 115 | static const UChar parity_table[256] = { |
| 116 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 117 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 118 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 119 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 120 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 121 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 122 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 123 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 124 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 125 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 126 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 127 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 128 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 129 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 130 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 131 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 132 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 133 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 134 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 135 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 136 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 137 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 138 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 139 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 140 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 141 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 142 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 143 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 144 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 145 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 146 | AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, |
| 147 | 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, |
| 148 | }; |
| 149 | |
sewardj | 4a6f384 | 2005-03-26 11:59:23 +0000 | [diff] [blame] | 150 | /* generalised left-shifter */ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 151 | static inline Long lshift ( Long x, Int n ) |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 152 | { |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 153 | if (n >= 0) |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 154 | return (ULong)x << n; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 155 | else |
| 156 | return x >> (-n); |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 157 | } |
| 158 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 159 | /* identity on ULong */ |
| 160 | static inline ULong idULong ( ULong x ) |
| 161 | { |
| 162 | return x; |
| 163 | } |
| 164 | |
sewardj | 118b23e | 2005-01-29 02:14:44 +0000 | [diff] [blame] | 165 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 166 | #define PREAMBLE(__data_bits) \ |
| 167 | /* const */ ULong DATA_MASK \ |
| 168 | = __data_bits==8 \ |
| 169 | ? 0xFFULL \ |
| 170 | : (__data_bits==16 \ |
| 171 | ? 0xFFFFULL \ |
| 172 | : (__data_bits==32 \ |
| 173 | ? 0xFFFFFFFFULL \ |
| 174 | : 0xFFFFFFFFFFFFFFFFULL)); \ |
| 175 | /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ |
| 176 | /* const */ ULong CC_DEP1 = cc_dep1_formal; \ |
| 177 | /* const */ ULong CC_DEP2 = cc_dep2_formal; \ |
| 178 | /* const */ ULong CC_NDEP = cc_ndep_formal; \ |
| 179 | /* Four bogus assignments, which hopefully gcc can */ \ |
| 180 | /* optimise away, and which stop it complaining about */ \ |
| 181 | /* unused variables. */ \ |
| 182 | SIGN_MASK = SIGN_MASK; \ |
| 183 | DATA_MASK = DATA_MASK; \ |
| 184 | CC_DEP2 = CC_DEP2; \ |
| 185 | CC_NDEP = CC_NDEP; |
| 186 | |
| 187 | |
| 188 | /*-------------------------------------------------------------*/ |
| 189 | |
| 190 | #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ |
| 191 | { \ |
| 192 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 193 | { ULong cf, pf, af, zf, sf, of; \ |
| 194 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 195 | argL = CC_DEP1; \ |
| 196 | argR = CC_DEP2; \ |
| 197 | res = argL + argR; \ |
| 198 | cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| 199 | pf = parity_table[(UChar)res]; \ |
| 200 | af = (res ^ argL ^ argR) & 0x10; \ |
| 201 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 202 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 203 | of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 204 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 205 | return cf | pf | af | zf | sf | of; \ |
| 206 | } \ |
sewardj | df0e002 | 2005-01-25 15:48:43 +0000 | [diff] [blame] | 207 | } |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 208 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 209 | /*-------------------------------------------------------------*/ |
| 210 | |
| 211 | #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ |
| 212 | { \ |
| 213 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 214 | { ULong cf, pf, af, zf, sf, of; \ |
| 215 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 216 | argL = CC_DEP1; \ |
| 217 | argR = CC_DEP2; \ |
| 218 | res = argL - argR; \ |
| 219 | cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| 220 | pf = parity_table[(UChar)res]; \ |
| 221 | af = (res ^ argL ^ argR) & 0x10; \ |
| 222 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 223 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 224 | of = lshift((argL ^ argR) & (argL ^ res), \ |
| 225 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 226 | return cf | pf | af | zf | sf | of; \ |
| 227 | } \ |
sewardj | 354e5c6 | 2005-01-27 20:12:52 +0000 | [diff] [blame] | 228 | } |
| 229 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 230 | /*-------------------------------------------------------------*/ |
| 231 | |
| 232 | #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ |
| 233 | { \ |
| 234 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 235 | { ULong cf, pf, af, zf, sf, of; \ |
| 236 | ULong argL, argR, oldC, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 237 | oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 238 | argL = CC_DEP1; \ |
| 239 | argR = CC_DEP2 ^ oldC; \ |
| 240 | res = (argL + argR) + oldC; \ |
| 241 | if (oldC) \ |
| 242 | cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ |
| 243 | else \ |
| 244 | cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ |
| 245 | pf = parity_table[(UChar)res]; \ |
| 246 | af = (res ^ argL ^ argR) & 0x10; \ |
| 247 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 248 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 249 | of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ |
| 250 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 251 | return cf | pf | af | zf | sf | of; \ |
| 252 | } \ |
| 253 | } |
| 254 | |
| 255 | /*-------------------------------------------------------------*/ |
| 256 | |
| 257 | #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ |
| 258 | { \ |
| 259 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 260 | { ULong cf, pf, af, zf, sf, of; \ |
| 261 | ULong argL, argR, oldC, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 262 | oldC = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 263 | argL = CC_DEP1; \ |
| 264 | argR = CC_DEP2 ^ oldC; \ |
| 265 | res = (argL - argR) - oldC; \ |
| 266 | if (oldC) \ |
| 267 | cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ |
| 268 | else \ |
| 269 | cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ |
| 270 | pf = parity_table[(UChar)res]; \ |
| 271 | af = (res ^ argL ^ argR) & 0x10; \ |
| 272 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 273 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 274 | of = lshift((argL ^ argR) & (argL ^ res), \ |
| 275 | 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ |
| 276 | return cf | pf | af | zf | sf | of; \ |
| 277 | } \ |
| 278 | } |
| 279 | |
| 280 | /*-------------------------------------------------------------*/ |
| 281 | |
| 282 | #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ |
| 283 | { \ |
| 284 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 285 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 286 | cf = 0; \ |
| 287 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 288 | af = 0; \ |
| 289 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 290 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 291 | of = 0; \ |
| 292 | return cf | pf | af | zf | sf | of; \ |
| 293 | } \ |
| 294 | } |
| 295 | |
| 296 | /*-------------------------------------------------------------*/ |
| 297 | |
| 298 | #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ |
| 299 | { \ |
| 300 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 301 | { ULong cf, pf, af, zf, sf, of; \ |
| 302 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 303 | res = CC_DEP1; \ |
| 304 | argL = res - 1; \ |
| 305 | argR = 1; \ |
| 306 | cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 307 | pf = parity_table[(UChar)res]; \ |
| 308 | af = (res ^ argL ^ argR) & 0x10; \ |
| 309 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 310 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 311 | of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ |
| 312 | return cf | pf | af | zf | sf | of; \ |
| 313 | } \ |
| 314 | } |
| 315 | |
| 316 | /*-------------------------------------------------------------*/ |
| 317 | |
| 318 | #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ |
| 319 | { \ |
| 320 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 321 | { ULong cf, pf, af, zf, sf, of; \ |
| 322 | ULong argL, argR, res; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 323 | res = CC_DEP1; \ |
| 324 | argL = res + 1; \ |
| 325 | argR = 1; \ |
| 326 | cf = CC_NDEP & AMD64G_CC_MASK_C; \ |
| 327 | pf = parity_table[(UChar)res]; \ |
| 328 | af = (res ^ argL ^ argR) & 0x10; \ |
| 329 | zf = ((DATA_UTYPE)res == 0) << 6; \ |
| 330 | sf = lshift(res, 8 - DATA_BITS) & 0x80; \ |
| 331 | of = ((res & DATA_MASK) \ |
| 332 | == ((ULong)SIGN_MASK - 1)) << 11; \ |
| 333 | return cf | pf | af | zf | sf | of; \ |
| 334 | } \ |
| 335 | } |
| 336 | |
| 337 | /*-------------------------------------------------------------*/ |
| 338 | |
| 339 | #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ |
| 340 | { \ |
| 341 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 342 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 343 | cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ |
| 344 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 345 | af = 0; /* undefined */ \ |
| 346 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 347 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 348 | /* of is defined if shift count == 1 */ \ |
| 349 | of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| 350 | & AMD64G_CC_MASK_O; \ |
| 351 | return cf | pf | af | zf | sf | of; \ |
| 352 | } \ |
| 353 | } |
| 354 | |
| 355 | /*-------------------------------------------------------------*/ |
| 356 | |
| 357 | #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ |
| 358 | { \ |
| 359 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 360 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 361 | cf = CC_DEP2 & 1; \ |
| 362 | pf = parity_table[(UChar)CC_DEP1]; \ |
| 363 | af = 0; /* undefined */ \ |
| 364 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 365 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 366 | /* of is defined if shift count == 1 */ \ |
| 367 | of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ |
| 368 | & AMD64G_CC_MASK_O; \ |
| 369 | return cf | pf | af | zf | sf | of; \ |
| 370 | } \ |
| 371 | } |
| 372 | |
| 373 | /*-------------------------------------------------------------*/ |
| 374 | |
| 375 | /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ |
| 376 | /* DEP1 = result, NDEP = old flags */ |
| 377 | #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ |
| 378 | { \ |
| 379 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 380 | { ULong fl \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 381 | = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 382 | | (AMD64G_CC_MASK_C & CC_DEP1) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 383 | | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 384 | 11-(DATA_BITS-1)) \ |
| 385 | ^ lshift(CC_DEP1, 11))); \ |
| 386 | return fl; \ |
| 387 | } \ |
| 388 | } |
| 389 | |
| 390 | /*-------------------------------------------------------------*/ |
| 391 | |
| 392 | /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ |
| 393 | /* DEP1 = result, NDEP = old flags */ |
| 394 | #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ |
| 395 | { \ |
| 396 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 397 | { ULong fl \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 398 | = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ |
| 399 | | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ |
| 400 | | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ |
| 401 | 11-(DATA_BITS-1)) \ |
| 402 | ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ |
| 403 | return fl; \ |
| 404 | } \ |
| 405 | } |
| 406 | |
| 407 | /*-------------------------------------------------------------*/ |
| 408 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 409 | #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ |
| 410 | DATA_U2TYPE, NARROWto2U) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 411 | { \ |
| 412 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 413 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 414 | DATA_UTYPE hi; \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 415 | DATA_UTYPE lo \ |
| 416 | = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ |
| 417 | * ((DATA_UTYPE)CC_DEP2) ); \ |
| 418 | DATA_U2TYPE rr \ |
| 419 | = NARROWto2U( \ |
| 420 | ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ |
| 421 | * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ |
| 422 | hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 423 | cf = (hi != 0); \ |
| 424 | pf = parity_table[(UChar)lo]; \ |
| 425 | af = 0; /* undefined */ \ |
| 426 | zf = (lo == 0) << 6; \ |
| 427 | sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| 428 | of = cf << 11; \ |
| 429 | return cf | pf | af | zf | sf | of; \ |
| 430 | } \ |
| 431 | } |
| 432 | |
| 433 | /*-------------------------------------------------------------*/ |
| 434 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 435 | #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ |
| 436 | DATA_S2TYPE, NARROWto2S) \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 437 | { \ |
| 438 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 439 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 440 | DATA_STYPE hi; \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 441 | DATA_STYPE lo \ |
florian | 45f8de6 | 2015-03-12 10:21:29 +0000 | [diff] [blame] | 442 | = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \ |
| 443 | * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \ |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 444 | DATA_S2TYPE rr \ |
| 445 | = NARROWto2S( \ |
| 446 | ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ |
| 447 | * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ |
| 448 | hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 449 | cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ |
| 450 | pf = parity_table[(UChar)lo]; \ |
| 451 | af = 0; /* undefined */ \ |
| 452 | zf = (lo == 0) << 6; \ |
| 453 | sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ |
| 454 | of = cf << 11; \ |
| 455 | return cf | pf | af | zf | sf | of; \ |
| 456 | } \ |
| 457 | } |
| 458 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 459 | /*-------------------------------------------------------------*/ |
| 460 | |
| 461 | #define ACTIONS_UMULQ \ |
| 462 | { \ |
| 463 | PREAMBLE(64); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 464 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 465 | ULong lo, hi; \ |
| 466 | mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ |
| 467 | cf = (hi != 0); \ |
| 468 | pf = parity_table[(UChar)lo]; \ |
| 469 | af = 0; /* undefined */ \ |
| 470 | zf = (lo == 0) << 6; \ |
| 471 | sf = lshift(lo, 8 - 64) & 0x80; \ |
| 472 | of = cf << 11; \ |
| 473 | return cf | pf | af | zf | sf | of; \ |
| 474 | } \ |
| 475 | } |
| 476 | |
| 477 | /*-------------------------------------------------------------*/ |
| 478 | |
| 479 | #define ACTIONS_SMULQ \ |
| 480 | { \ |
| 481 | PREAMBLE(64); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 482 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 483 | Long lo, hi; \ |
| 484 | mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ |
| 485 | cf = (hi != (lo >>/*s*/ (64-1))); \ |
| 486 | pf = parity_table[(UChar)lo]; \ |
| 487 | af = 0; /* undefined */ \ |
| 488 | zf = (lo == 0) << 6; \ |
| 489 | sf = lshift(lo, 8 - 64) & 0x80; \ |
| 490 | of = cf << 11; \ |
| 491 | return cf | pf | af | zf | sf | of; \ |
| 492 | } \ |
| 493 | } |
| 494 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 495 | /*-------------------------------------------------------------*/ |
| 496 | |
| 497 | #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \ |
| 498 | { \ |
| 499 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 500 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 501 | cf = 0; \ |
| 502 | pf = 0; \ |
| 503 | af = 0; \ |
| 504 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 505 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 506 | of = 0; \ |
| 507 | return cf | pf | af | zf | sf | of; \ |
| 508 | } \ |
| 509 | } |
| 510 | |
| 511 | /*-------------------------------------------------------------*/ |
| 512 | |
| 513 | #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \ |
| 514 | { \ |
| 515 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 516 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 517 | cf = ((DATA_UTYPE)CC_DEP2 != 0); \ |
| 518 | pf = 0; \ |
| 519 | af = 0; \ |
| 520 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 521 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 522 | of = 0; \ |
| 523 | return cf | pf | af | zf | sf | of; \ |
| 524 | } \ |
| 525 | } |
| 526 | |
| 527 | /*-------------------------------------------------------------*/ |
| 528 | |
| 529 | #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \ |
| 530 | { \ |
| 531 | PREAMBLE(DATA_BITS); \ |
| 532 | { Long cf, pf, af, zf, sf, of; \ |
| 533 | cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| 534 | pf = 0; \ |
| 535 | af = 0; \ |
| 536 | zf = 0; \ |
| 537 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 538 | of = 0; \ |
| 539 | return cf | pf | af | zf | sf | of; \ |
| 540 | } \ |
| 541 | } |
| 542 | |
| 543 | /*-------------------------------------------------------------*/ |
| 544 | |
| 545 | #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \ |
| 546 | { \ |
| 547 | PREAMBLE(DATA_BITS); \ |
florian | 108e03f | 2015-03-10 16:11:58 +0000 | [diff] [blame] | 548 | { ULong cf, pf, af, zf, sf, of; \ |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 549 | cf = ((DATA_UTYPE)CC_DEP2 == 0); \ |
| 550 | pf = 0; \ |
| 551 | af = 0; \ |
| 552 | zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ |
| 553 | sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ |
| 554 | of = 0; \ |
| 555 | return cf | pf | af | zf | sf | of; \ |
| 556 | } \ |
| 557 | } |
| 558 | |
| 559 | /*-------------------------------------------------------------*/ |
| 560 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 561 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 562 | #if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 563 | |
| 564 | static Bool initted = False; |
| 565 | |
| 566 | /* C flag, fast route */ |
| 567 | static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; |
| 568 | /* C flag, slow route */ |
| 569 | static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; |
| 570 | /* table for calculate_cond */ |
| 571 | static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; |
| 572 | /* total entry counts for calc_all, calc_c, calc_cond. */ |
| 573 | static UInt n_calc_all = 0; |
| 574 | static UInt n_calc_c = 0; |
| 575 | static UInt n_calc_cond = 0; |
| 576 | |
| 577 | #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) |
| 578 | |
| 579 | |
| 580 | static void showCounts ( void ) |
| 581 | { |
| 582 | Int op, co; |
florian | 5df8ab0 | 2012-10-13 19:34:19 +0000 | [diff] [blame] | 583 | HChar ch; |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 584 | vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 585 | n_calc_all, n_calc_cond, n_calc_c); |
| 586 | |
| 587 | vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" |
| 588 | " S NS P NP L NL LE NLE\n"); |
| 589 | vex_printf(" -----------------------------------------------------" |
| 590 | "----------------------------------------\n"); |
| 591 | for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| 592 | |
| 593 | ch = ' '; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 594 | if (op > 0 && (op-1) % 4 == 0) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 595 | ch = 'B'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 596 | if (op > 0 && (op-1) % 4 == 1) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 597 | ch = 'W'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 598 | if (op > 0 && (op-1) % 4 == 2) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 599 | ch = 'L'; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 600 | if (op > 0 && (op-1) % 4 == 3) |
| 601 | ch = 'Q'; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 602 | |
| 603 | vex_printf("%2d%c: ", op, ch); |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 604 | vex_printf("%6u ", tabc_slow[op]); |
| 605 | vex_printf("%6u ", tabc_fast[op]); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 606 | for (co = 0; co < 16; co++) { |
| 607 | Int n = tab_cond[op][co]; |
| 608 | if (n >= 1000) { |
| 609 | vex_printf(" %3dK", n / 1000); |
| 610 | } else |
| 611 | if (n >= 0) { |
| 612 | vex_printf(" %3d ", n ); |
| 613 | } else { |
| 614 | vex_printf(" "); |
| 615 | } |
| 616 | } |
| 617 | vex_printf("\n"); |
| 618 | } |
| 619 | vex_printf("\n"); |
| 620 | } |
| 621 | |
| 622 | static void initCounts ( void ) |
| 623 | { |
| 624 | Int op, co; |
| 625 | initted = True; |
| 626 | for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { |
| 627 | tabc_fast[op] = tabc_slow[op] = 0; |
| 628 | for (co = 0; co < 16; co++) |
| 629 | tab_cond[op][co] = 0; |
| 630 | } |
| 631 | } |
| 632 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 633 | #endif /* PROFILE_RFLAGS */ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 634 | |
| 635 | |
| 636 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 637 | /* Calculate all the 6 flags from the supplied thunk parameters. |
| 638 | Worker function, not directly called from generated code. */ |
| 639 | static |
| 640 | ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, |
| 641 | ULong cc_dep1_formal, |
| 642 | ULong cc_dep2_formal, |
| 643 | ULong cc_ndep_formal ) |
| 644 | { |
| 645 | switch (cc_op) { |
| 646 | case AMD64G_CC_OP_COPY: |
| 647 | return cc_dep1_formal |
| 648 | & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
| 649 | | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); |
| 650 | |
| 651 | case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); |
| 652 | case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); |
| 653 | case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 654 | case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 655 | |
| 656 | case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); |
| 657 | case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); |
| 658 | case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 659 | case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 660 | |
| 661 | case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); |
| 662 | case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); |
| 663 | case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); |
| 664 | case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); |
| 665 | |
| 666 | case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); |
| 667 | case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); |
| 668 | case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 669 | case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 670 | |
| 671 | case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); |
| 672 | case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); |
| 673 | case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); |
| 674 | case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); |
| 675 | |
| 676 | case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); |
| 677 | case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); |
| 678 | case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 679 | case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 680 | |
| 681 | case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); |
| 682 | case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); |
| 683 | case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 684 | case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 685 | |
| 686 | case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); |
| 687 | case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); |
| 688 | case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 689 | case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 690 | |
| 691 | case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); |
| 692 | case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); |
| 693 | case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); |
sewardj | a6b93d1 | 2005-02-17 09:28:28 +0000 | [diff] [blame] | 694 | case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 695 | |
| 696 | case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); |
| 697 | case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); |
| 698 | case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 699 | case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 700 | |
| 701 | case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); |
| 702 | case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); |
| 703 | case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 704 | case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 705 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 706 | case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, |
| 707 | UShort, toUShort ); |
| 708 | case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, |
| 709 | UInt, toUInt ); |
| 710 | case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, |
| 711 | ULong, idULong ); |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 712 | |
sewardj | 8bdb89a | 2005-05-05 21:46:50 +0000 | [diff] [blame] | 713 | case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; |
| 714 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 715 | case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, |
| 716 | Short, toUShort ); |
| 717 | case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, |
| 718 | Int, toUInt ); |
| 719 | case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, |
| 720 | Long, idULong ); |
| 721 | |
sewardj | 1a01e65 | 2005-02-23 11:39:21 +0000 | [diff] [blame] | 722 | case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 723 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 724 | case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt ); |
| 725 | case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong ); |
| 726 | |
| 727 | case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt ); |
| 728 | case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong ); |
| 729 | |
| 730 | case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt ); |
| 731 | case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong ); |
| 732 | |
| 733 | case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt ); |
| 734 | case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong ); |
| 735 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 736 | default: |
| 737 | /* shouldn't really make these calls from generated code */ |
| 738 | vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 739 | "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 740 | cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); |
| 741 | vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); |
| 742 | } |
| 743 | } |
| 744 | |
| 745 | |
| 746 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 747 | /* Calculate all the 6 flags from the supplied thunk parameters. */ |
| 748 | ULong amd64g_calculate_rflags_all ( ULong cc_op, |
| 749 | ULong cc_dep1, |
| 750 | ULong cc_dep2, |
| 751 | ULong cc_ndep ) |
| 752 | { |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 753 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 754 | if (!initted) initCounts(); |
| 755 | n_calc_all++; |
| 756 | if (SHOW_COUNTS_NOW) showCounts(); |
| 757 | # endif |
| 758 | return |
| 759 | amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| 760 | } |
| 761 | |
| 762 | |
| 763 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 764 | /* Calculate just the carry flag from the supplied thunk parameters. */ |
| 765 | ULong amd64g_calculate_rflags_c ( ULong cc_op, |
| 766 | ULong cc_dep1, |
| 767 | ULong cc_dep2, |
| 768 | ULong cc_ndep ) |
| 769 | { |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 770 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 771 | if (!initted) initCounts(); |
| 772 | n_calc_c++; |
| 773 | tabc_fast[cc_op]++; |
| 774 | if (SHOW_COUNTS_NOW) showCounts(); |
| 775 | # endif |
| 776 | |
| 777 | /* Fast-case some common ones. */ |
| 778 | switch (cc_op) { |
sewardj | 7fc494b | 2005-05-05 12:05:11 +0000 | [diff] [blame] | 779 | case AMD64G_CC_OP_COPY: |
| 780 | return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 781 | case AMD64G_CC_OP_LOGICQ: |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 782 | case AMD64G_CC_OP_LOGICL: |
| 783 | case AMD64G_CC_OP_LOGICW: |
| 784 | case AMD64G_CC_OP_LOGICB: |
| 785 | return 0; |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 786 | // case AMD64G_CC_OP_SUBL: |
| 787 | // return ((UInt)cc_dep1) < ((UInt)cc_dep2) |
| 788 | // ? AMD64G_CC_MASK_C : 0; |
| 789 | // case AMD64G_CC_OP_SUBW: |
| 790 | // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) |
| 791 | // ? AMD64G_CC_MASK_C : 0; |
| 792 | // case AMD64G_CC_OP_SUBB: |
| 793 | // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) |
| 794 | // ? AMD64G_CC_MASK_C : 0; |
| 795 | // case AMD64G_CC_OP_INCL: |
| 796 | // case AMD64G_CC_OP_DECL: |
| 797 | // return cc_ndep & AMD64G_CC_MASK_C; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 798 | default: |
| 799 | break; |
| 800 | } |
| 801 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 802 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 803 | tabc_fast[cc_op]--; |
| 804 | tabc_slow[cc_op]++; |
| 805 | # endif |
| 806 | |
| 807 | return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) |
| 808 | & AMD64G_CC_MASK_C; |
| 809 | } |
| 810 | |
| 811 | |
| 812 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 813 | /* returns 1 or 0 */ |
| 814 | ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, |
| 815 | ULong cc_op, |
| 816 | ULong cc_dep1, |
| 817 | ULong cc_dep2, |
| 818 | ULong cc_ndep ) |
| 819 | { |
| 820 | ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, |
| 821 | cc_dep2, cc_ndep); |
| 822 | ULong of,sf,zf,cf,pf; |
| 823 | ULong inv = cond & 1; |
| 824 | |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 825 | # if PROFILE_RFLAGS |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 826 | if (!initted) initCounts(); |
| 827 | tab_cond[cc_op][cond]++; |
| 828 | n_calc_cond++; |
| 829 | if (SHOW_COUNTS_NOW) showCounts(); |
| 830 | # endif |
| 831 | |
| 832 | switch (cond) { |
| 833 | case AMD64CondNO: |
| 834 | case AMD64CondO: /* OF == 1 */ |
| 835 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 836 | return 1 & (inv ^ of); |
| 837 | |
| 838 | case AMD64CondNZ: |
| 839 | case AMD64CondZ: /* ZF == 1 */ |
| 840 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 841 | return 1 & (inv ^ zf); |
| 842 | |
| 843 | case AMD64CondNB: |
| 844 | case AMD64CondB: /* CF == 1 */ |
| 845 | cf = rflags >> AMD64G_CC_SHIFT_C; |
| 846 | return 1 & (inv ^ cf); |
| 847 | break; |
| 848 | |
| 849 | case AMD64CondNBE: |
| 850 | case AMD64CondBE: /* (CF or ZF) == 1 */ |
| 851 | cf = rflags >> AMD64G_CC_SHIFT_C; |
| 852 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 853 | return 1 & (inv ^ (cf | zf)); |
| 854 | break; |
| 855 | |
| 856 | case AMD64CondNS: |
| 857 | case AMD64CondS: /* SF == 1 */ |
| 858 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 859 | return 1 & (inv ^ sf); |
| 860 | |
| 861 | case AMD64CondNP: |
| 862 | case AMD64CondP: /* PF == 1 */ |
| 863 | pf = rflags >> AMD64G_CC_SHIFT_P; |
| 864 | return 1 & (inv ^ pf); |
| 865 | |
| 866 | case AMD64CondNL: |
| 867 | case AMD64CondL: /* (SF xor OF) == 1 */ |
| 868 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 869 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 870 | return 1 & (inv ^ (sf ^ of)); |
| 871 | break; |
| 872 | |
| 873 | case AMD64CondNLE: |
| 874 | case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ |
| 875 | sf = rflags >> AMD64G_CC_SHIFT_S; |
| 876 | of = rflags >> AMD64G_CC_SHIFT_O; |
| 877 | zf = rflags >> AMD64G_CC_SHIFT_Z; |
| 878 | return 1 & (inv ^ ((sf ^ of) | zf)); |
| 879 | break; |
| 880 | |
| 881 | default: |
| 882 | /* shouldn't really make these calls from generated code */ |
| 883 | vex_printf("amd64g_calculate_condition" |
sewardj | 1fa7b80 | 2005-03-25 14:39:37 +0000 | [diff] [blame] | 884 | "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 885 | cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); |
| 886 | vpanic("amd64g_calculate_condition"); |
| 887 | } |
| 888 | } |
| 889 | |
| 890 | |
| 891 | /* VISIBLE TO LIBVEX CLIENT */ |
florian | efa834a | 2012-11-24 21:07:14 +0000 | [diff] [blame] | 892 | ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state ) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 893 | { |
| 894 | ULong rflags = amd64g_calculate_rflags_all_WRK( |
| 895 | vex_state->guest_CC_OP, |
| 896 | vex_state->guest_CC_DEP1, |
| 897 | vex_state->guest_CC_DEP2, |
| 898 | vex_state->guest_CC_NDEP |
| 899 | ); |
sewardj | 7de0d3c | 2005-02-13 02:26:41 +0000 | [diff] [blame] | 900 | Long dflag = vex_state->guest_DFLAG; |
| 901 | vassert(dflag == 1 || dflag == -1); |
| 902 | if (dflag == -1) |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 903 | rflags |= (1<<10); |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 904 | if (vex_state->guest_IDFLAG == 1) |
| 905 | rflags |= (1<<21); |
sewardj | 5e120aa | 2010-09-28 15:59:04 +0000 | [diff] [blame] | 906 | if (vex_state->guest_ACFLAG == 1) |
| 907 | rflags |= (1<<18); |
| 908 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 909 | return rflags; |
| 910 | } |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 911 | |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 912 | /* VISIBLE TO LIBVEX CLIENT */ |
| 913 | void |
| 914 | LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, |
| 915 | /*MOD*/VexGuestAMD64State* vex_state ) |
| 916 | { |
| 917 | ULong oszacp = amd64g_calculate_rflags_all_WRK( |
| 918 | vex_state->guest_CC_OP, |
| 919 | vex_state->guest_CC_DEP1, |
| 920 | vex_state->guest_CC_DEP2, |
| 921 | vex_state->guest_CC_NDEP |
| 922 | ); |
| 923 | if (new_carry_flag & 1) { |
| 924 | oszacp |= AMD64G_CC_MASK_C; |
| 925 | } else { |
| 926 | oszacp &= ~AMD64G_CC_MASK_C; |
| 927 | } |
| 928 | vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| 929 | vex_state->guest_CC_DEP1 = oszacp; |
| 930 | vex_state->guest_CC_DEP2 = 0; |
| 931 | vex_state->guest_CC_NDEP = 0; |
| 932 | } |
| 933 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 934 | |
| 935 | /*---------------------------------------------------------------*/ |
| 936 | /*--- %rflags translation-time function specialisers. ---*/ |
| 937 | /*--- These help iropt specialise calls the above run-time ---*/ |
| 938 | /*--- %rflags functions. ---*/ |
| 939 | /*---------------------------------------------------------------*/ |
| 940 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 941 | /* Used by the optimiser to try specialisations. Returns an |
| 942 | equivalent expression, or NULL if none. */ |
| 943 | |
| 944 | static Bool isU64 ( IRExpr* e, ULong n ) |
| 945 | { |
sewardj | 65b17c6 | 2005-05-02 15:52:44 +0000 | [diff] [blame] | 946 | return toBool( e->tag == Iex_Const |
| 947 | && e->Iex.Const.con->tag == Ico_U64 |
| 948 | && e->Iex.Const.con->Ico.U64 == n ); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 949 | } |
sewardj | 354e5c6 | 2005-01-27 20:12:52 +0000 | [diff] [blame] | 950 | |
florian | 1ff4756 | 2012-10-21 02:09:51 +0000 | [diff] [blame] | 951 | IRExpr* guest_amd64_spechelper ( const HChar* function_name, |
sewardj | be91791 | 2010-08-22 12:38:53 +0000 | [diff] [blame] | 952 | IRExpr** args, |
| 953 | IRStmt** precedingStmts, |
| 954 | Int n_precedingStmts ) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 955 | { |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 956 | # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) |
| 957 | # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) |
| 958 | # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 959 | # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 960 | # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) |
| 961 | |
| 962 | Int i, arity = 0; |
| 963 | for (i = 0; args[i]; i++) |
| 964 | arity++; |
| 965 | # if 0 |
| 966 | vex_printf("spec request:\n"); |
| 967 | vex_printf(" %s ", function_name); |
| 968 | for (i = 0; i < arity; i++) { |
| 969 | vex_printf(" "); |
| 970 | ppIRExpr(args[i]); |
| 971 | } |
| 972 | vex_printf("\n"); |
| 973 | # endif |
| 974 | |
| 975 | /* --------- specialising "amd64g_calculate_condition" --------- */ |
| 976 | |
| 977 | if (vex_streq(function_name, "amd64g_calculate_condition")) { |
| 978 | /* specialise calls to above "calculate condition" function */ |
| 979 | IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; |
| 980 | vassert(arity == 5); |
| 981 | cond = args[0]; |
| 982 | cc_op = args[1]; |
| 983 | cc_dep1 = args[2]; |
| 984 | cc_dep2 = args[3]; |
| 985 | |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 986 | /*---------------- ADDQ ----------------*/ |
| 987 | |
| 988 | if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { |
| 989 | /* long long add, then Z --> test (dst+src == 0) */ |
| 990 | return unop(Iop_1Uto64, |
| 991 | binop(Iop_CmpEQ64, |
| 992 | binop(Iop_Add64, cc_dep1, cc_dep2), |
| 993 | mkU64(0))); |
| 994 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 995 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 996 | /*---------------- ADDL ----------------*/ |
| 997 | |
| 998 | if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) { |
| 999 | /* This is very commonly generated by Javascript JITs, for |
| 1000 | the idiom "do a 32-bit add and jump to out-of-line code if |
| 1001 | an overflow occurs". */ |
| 1002 | /* long add, then O (overflow) |
| 1003 | --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31] |
| 1004 | --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 |
| 1005 | --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1 |
| 1006 | */ |
| 1007 | vassert(isIRAtom(cc_dep1)); |
| 1008 | vassert(isIRAtom(cc_dep2)); |
| 1009 | return |
| 1010 | binop(Iop_And64, |
| 1011 | binop(Iop_Shr64, |
| 1012 | binop(Iop_And64, |
| 1013 | unop(Iop_Not64, |
| 1014 | binop(Iop_Xor64, cc_dep1, cc_dep2)), |
| 1015 | binop(Iop_Xor64, |
| 1016 | cc_dep1, |
| 1017 | binop(Iop_Add64, cc_dep1, cc_dep2))), |
| 1018 | mkU8(31)), |
| 1019 | mkU64(1)); |
| 1020 | |
| 1021 | } |
| 1022 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1023 | /*---------------- SUBQ ----------------*/ |
| 1024 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1025 | /* 0, */ |
| 1026 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) { |
| 1027 | /* long long sub/cmp, then O (overflow) |
| 1028 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63] |
| 1029 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63 |
| 1030 | */ |
| 1031 | vassert(isIRAtom(cc_dep1)); |
| 1032 | vassert(isIRAtom(cc_dep2)); |
| 1033 | return binop(Iop_Shr64, |
| 1034 | binop(Iop_And64, |
| 1035 | binop(Iop_Xor64, cc_dep1, cc_dep2), |
| 1036 | binop(Iop_Xor64, |
| 1037 | cc_dep1, |
| 1038 | binop(Iop_Sub64, cc_dep1, cc_dep2))), |
| 1039 | mkU8(64)); |
| 1040 | } |
| 1041 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) { |
| 1042 | /* No action. Never yet found a test case. */ |
| 1043 | } |
| 1044 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1045 | /* 2, 3 */ |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1046 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) { |
| 1047 | /* long long sub/cmp, then B (unsigned less than) |
| 1048 | --> test dst <u src */ |
| 1049 | return unop(Iop_1Uto64, |
| 1050 | binop(Iop_CmpLT64U, cc_dep1, cc_dep2)); |
| 1051 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1052 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) { |
| 1053 | /* long long sub/cmp, then NB (unsigned greater than or equal) |
| 1054 | --> test src <=u dst */ |
| 1055 | /* Note, args are opposite way round from the usual */ |
| 1056 | return unop(Iop_1Uto64, |
| 1057 | binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); |
| 1058 | } |
| 1059 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1060 | /* 4, 5 */ |
| 1061 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { |
| 1062 | /* long long sub/cmp, then Z --> test dst==src */ |
sewardj | 3cfd1f0 | 2013-08-07 09:45:08 +0000 | [diff] [blame] | 1063 | return unop(Iop_1Uto64, |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1064 | binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); |
| 1065 | } |
| 1066 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { |
| 1067 | /* long long sub/cmp, then NZ --> test dst!=src */ |
| 1068 | return unop(Iop_1Uto64, |
| 1069 | binop(Iop_CmpNE64,cc_dep1,cc_dep2)); |
sewardj | 3cfd1f0 | 2013-08-07 09:45:08 +0000 | [diff] [blame] | 1070 | } |
| 1071 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1072 | /* 6, 7 */ |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1073 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { |
| 1074 | /* long long sub/cmp, then BE (unsigned less than or equal) |
| 1075 | --> test dst <=u src */ |
| 1076 | return unop(Iop_1Uto64, |
| 1077 | binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); |
| 1078 | } |
sewardj | 3a05a15 | 2012-02-23 07:36:43 +0000 | [diff] [blame] | 1079 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) { |
| 1080 | /* long long sub/cmp, then NBE (unsigned greater than) |
| 1081 | --> test !(dst <=u src) */ |
| 1082 | return binop(Iop_Xor64, |
| 1083 | unop(Iop_1Uto64, |
| 1084 | binop(Iop_CmpLE64U, cc_dep1, cc_dep2)), |
| 1085 | mkU64(1)); |
| 1086 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1087 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1088 | /* 8, 9 */ |
| 1089 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) { |
| 1090 | /* long long sub/cmp, then S (negative) |
| 1091 | --> (dst-src)[63] |
| 1092 | --> (dst-src) >>u 63 */ |
| 1093 | return binop(Iop_Shr64, |
| 1094 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1095 | mkU8(63)); |
| 1096 | } |
| 1097 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) { |
| 1098 | /* long long sub/cmp, then NS (not negative) |
| 1099 | --> (dst-src)[63] ^ 1 |
| 1100 | --> ((dst-src) >>u 63) ^ 1 */ |
| 1101 | return binop(Iop_Xor64, |
| 1102 | binop(Iop_Shr64, |
| 1103 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1104 | mkU8(63)), |
| 1105 | mkU64(1)); |
| 1106 | } |
| 1107 | |
| 1108 | /* 12, 13 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1109 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { |
| 1110 | /* long long sub/cmp, then L (signed less than) |
| 1111 | --> test dst <s src */ |
| 1112 | return unop(Iop_1Uto64, |
| 1113 | binop(Iop_CmpLT64S, cc_dep1, cc_dep2)); |
| 1114 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1115 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) { |
| 1116 | /* long long sub/cmp, then NL (signed greater than or equal) |
| 1117 | --> test dst >=s src |
| 1118 | --> test src <=s dst */ |
| 1119 | return unop(Iop_1Uto64, |
| 1120 | binop(Iop_CmpLE64S, cc_dep2, cc_dep1)); |
| 1121 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1122 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1123 | /* 14, 15 */ |
| 1124 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) { |
| 1125 | /* long long sub/cmp, then LE (signed less than or equal) |
| 1126 | --> test dst <=s src */ |
| 1127 | return unop(Iop_1Uto64, |
| 1128 | binop(Iop_CmpLE64S, cc_dep1, cc_dep2)); |
| 1129 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1130 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) { |
| 1131 | /* long sub/cmp, then NLE (signed greater than) |
| 1132 | --> test !(dst <=s src) |
| 1133 | --> test (dst >s src) |
| 1134 | --> test (src <s dst) */ |
| 1135 | return unop(Iop_1Uto64, |
| 1136 | binop(Iop_CmpLT64S, cc_dep2, cc_dep1)); |
| 1137 | |
| 1138 | } |
| 1139 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1140 | /*---------------- SUBL ----------------*/ |
| 1141 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1142 | /* 0, */ |
| 1143 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) { |
| 1144 | /* This is very commonly generated by Javascript JITs, for |
| 1145 | the idiom "do a 32-bit subtract and jump to out-of-line |
| 1146 | code if an overflow occurs". */ |
| 1147 | /* long sub/cmp, then O (overflow) |
| 1148 | --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31] |
| 1149 | --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1 |
| 1150 | */ |
| 1151 | vassert(isIRAtom(cc_dep1)); |
| 1152 | vassert(isIRAtom(cc_dep2)); |
| 1153 | return |
| 1154 | binop(Iop_And64, |
| 1155 | binop(Iop_Shr64, |
| 1156 | binop(Iop_And64, |
| 1157 | binop(Iop_Xor64, cc_dep1, cc_dep2), |
| 1158 | binop(Iop_Xor64, |
| 1159 | cc_dep1, |
| 1160 | binop(Iop_Sub64, cc_dep1, cc_dep2))), |
| 1161 | mkU8(31)), |
| 1162 | mkU64(1)); |
| 1163 | } |
| 1164 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) { |
| 1165 | /* No action. Never yet found a test case. */ |
| 1166 | } |
| 1167 | |
| 1168 | /* 2, 3 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1169 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) { |
| 1170 | /* long sub/cmp, then B (unsigned less than) |
| 1171 | --> test dst <u src */ |
| 1172 | return unop(Iop_1Uto64, |
| 1173 | binop(Iop_CmpLT32U, |
| 1174 | unop(Iop_64to32, cc_dep1), |
| 1175 | unop(Iop_64to32, cc_dep2))); |
| 1176 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1177 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) { |
| 1178 | /* long sub/cmp, then NB (unsigned greater than or equal) |
| 1179 | --> test src <=u dst */ |
| 1180 | /* Note, args are opposite way round from the usual */ |
| 1181 | return unop(Iop_1Uto64, |
| 1182 | binop(Iop_CmpLE32U, |
| 1183 | unop(Iop_64to32, cc_dep2), |
| 1184 | unop(Iop_64to32, cc_dep1))); |
| 1185 | } |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1186 | |
| 1187 | /* 4, 5 */ |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 1188 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { |
| 1189 | /* long sub/cmp, then Z --> test dst==src */ |
| 1190 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1191 | binop(Iop_CmpEQ32, |
| 1192 | unop(Iop_64to32, cc_dep1), |
| 1193 | unop(Iop_64to32, cc_dep2))); |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1194 | } |
sewardj | a9e4a80 | 2005-12-26 19:33:55 +0000 | [diff] [blame] | 1195 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { |
| 1196 | /* long sub/cmp, then NZ --> test dst!=src */ |
| 1197 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1198 | binop(Iop_CmpNE32, |
| 1199 | unop(Iop_64to32, cc_dep1), |
| 1200 | unop(Iop_64to32, cc_dep2))); |
sewardj | db261e4 | 2005-05-11 23:16:43 +0000 | [diff] [blame] | 1201 | } |
| 1202 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1203 | /* 6, 7 */ |
| 1204 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { |
| 1205 | /* long sub/cmp, then BE (unsigned less than or equal) |
| 1206 | --> test dst <=u src */ |
| 1207 | return unop(Iop_1Uto64, |
| 1208 | binop(Iop_CmpLE32U, |
| 1209 | unop(Iop_64to32, cc_dep1), |
| 1210 | unop(Iop_64to32, cc_dep2))); |
| 1211 | } |
| 1212 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { |
| 1213 | /* long sub/cmp, then NBE (unsigned greater than) |
| 1214 | --> test src <u dst */ |
| 1215 | /* Note, args are opposite way round from the usual */ |
| 1216 | return unop(Iop_1Uto64, |
| 1217 | binop(Iop_CmpLT32U, |
| 1218 | unop(Iop_64to32, cc_dep2), |
| 1219 | unop(Iop_64to32, cc_dep1))); |
| 1220 | } |
| 1221 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1222 | /* 8, 9 */ |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1223 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) { |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1224 | /* long sub/cmp, then S (negative) |
| 1225 | --> (dst-src)[31] |
| 1226 | --> ((dst -64 src) >>u 31) & 1 |
| 1227 | Pointless to narrow the args to 32 bit before the subtract. */ |
| 1228 | return binop(Iop_And64, |
| 1229 | binop(Iop_Shr64, |
| 1230 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1231 | mkU8(31)), |
| 1232 | mkU64(1)); |
| 1233 | } |
| 1234 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) { |
| 1235 | /* long sub/cmp, then NS (not negative) |
| 1236 | --> (dst-src)[31] ^ 1 |
| 1237 | --> (((dst -64 src) >>u 31) & 1) ^ 1 |
| 1238 | Pointless to narrow the args to 32 bit before the subtract. */ |
| 1239 | return binop(Iop_Xor64, |
| 1240 | binop(Iop_And64, |
| 1241 | binop(Iop_Shr64, |
| 1242 | binop(Iop_Sub64, cc_dep1, cc_dep2), |
| 1243 | mkU8(31)), |
| 1244 | mkU64(1)), |
| 1245 | mkU64(1)); |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1246 | } |
| 1247 | |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1248 | /* 12, 13 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1249 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { |
| 1250 | /* long sub/cmp, then L (signed less than) |
| 1251 | --> test dst <s src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1252 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1253 | binop(Iop_CmpLT32S, |
| 1254 | unop(Iop_64to32, cc_dep1), |
| 1255 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1256 | } |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1257 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) { |
| 1258 | /* long sub/cmp, then NL (signed greater than or equal) |
| 1259 | --> test dst >=s src |
| 1260 | --> test src <=s dst */ |
| 1261 | return unop(Iop_1Uto64, |
| 1262 | binop(Iop_CmpLE32S, |
| 1263 | unop(Iop_64to32, cc_dep2), |
| 1264 | unop(Iop_64to32, cc_dep1))); |
| 1265 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1266 | |
sewardj | edccb44 | 2014-10-02 11:32:39 +0000 | [diff] [blame] | 1267 | /* 14, 15 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1268 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) { |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1269 | /* long sub/cmp, then LE (signed less than or equal) |
| 1270 | --> test dst <=s src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1271 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1272 | binop(Iop_CmpLE32S, |
| 1273 | unop(Iop_64to32, cc_dep1), |
| 1274 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1275 | |
| 1276 | } |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1277 | if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) { |
| 1278 | /* long sub/cmp, then NLE (signed greater than) |
| 1279 | --> test !(dst <=s src) |
| 1280 | --> test (dst >s src) |
| 1281 | --> test (src <s dst) */ |
| 1282 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1283 | binop(Iop_CmpLT32S, |
| 1284 | unop(Iop_64to32, cc_dep2), |
| 1285 | unop(Iop_64to32, cc_dep1))); |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1286 | |
| 1287 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1288 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1289 | /*---------------- SUBW ----------------*/ |
| 1290 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1291 | /* 4, 5 */ |
sewardj | a82b476 | 2005-05-06 16:30:21 +0000 | [diff] [blame] | 1292 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) { |
| 1293 | /* word sub/cmp, then Z --> test dst==src */ |
| 1294 | return unop(Iop_1Uto64, |
| 1295 | binop(Iop_CmpEQ16, |
| 1296 | unop(Iop_64to16,cc_dep1), |
| 1297 | unop(Iop_64to16,cc_dep2))); |
| 1298 | } |
sewardj | beb5291 | 2008-05-02 22:15:12 +0000 | [diff] [blame] | 1299 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { |
| 1300 | /* word sub/cmp, then NZ --> test dst!=src */ |
| 1301 | return unop(Iop_1Uto64, |
| 1302 | binop(Iop_CmpNE16, |
| 1303 | unop(Iop_64to16,cc_dep1), |
| 1304 | unop(Iop_64to16,cc_dep2))); |
| 1305 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1306 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1307 | /* 6, */ |
sewardj | aedb859 | 2014-10-02 16:15:30 +0000 | [diff] [blame] | 1308 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) { |
| 1309 | /* word sub/cmp, then BE (unsigned less than or equal) |
| 1310 | --> test dst <=u src */ |
| 1311 | return unop(Iop_1Uto64, |
| 1312 | binop(Iop_CmpLE64U, |
| 1313 | binop(Iop_Shl64, cc_dep1, mkU8(48)), |
| 1314 | binop(Iop_Shl64, cc_dep2, mkU8(48)))); |
| 1315 | } |
| 1316 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1317 | /* 14, */ |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1318 | if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1319 | /* word sub/cmp, then LE (signed less than or equal) |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1320 | --> test dst <=s src */ |
| 1321 | return unop(Iop_1Uto64, |
| 1322 | binop(Iop_CmpLE64S, |
| 1323 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1324 | binop(Iop_Shl64,cc_dep2,mkU8(48)))); |
| 1325 | |
| 1326 | } |
| 1327 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1328 | /*---------------- SUBB ----------------*/ |
| 1329 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1330 | /* 2, 3 */ |
| 1331 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) { |
| 1332 | /* byte sub/cmp, then B (unsigned less than) |
| 1333 | --> test dst <u src */ |
| 1334 | return unop(Iop_1Uto64, |
| 1335 | binop(Iop_CmpLT64U, |
| 1336 | binop(Iop_And64, cc_dep1, mkU64(0xFF)), |
| 1337 | binop(Iop_And64, cc_dep2, mkU64(0xFF)))); |
| 1338 | } |
| 1339 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) { |
| 1340 | /* byte sub/cmp, then NB (unsigned greater than or equal) |
| 1341 | --> test src <=u dst */ |
| 1342 | /* Note, args are opposite way round from the usual */ |
| 1343 | return unop(Iop_1Uto64, |
| 1344 | binop(Iop_CmpLE64U, |
| 1345 | binop(Iop_And64, cc_dep2, mkU64(0xFF)), |
| 1346 | binop(Iop_And64, cc_dep1, mkU64(0xFF)))); |
| 1347 | } |
| 1348 | |
| 1349 | /* 4, 5 */ |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1350 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { |
| 1351 | /* byte sub/cmp, then Z --> test dst==src */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1352 | return unop(Iop_1Uto64, |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1353 | binop(Iop_CmpEQ8, |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1354 | unop(Iop_64to8,cc_dep1), |
| 1355 | unop(Iop_64to8,cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1356 | } |
sewardj | 32d615b | 2006-08-25 12:52:19 +0000 | [diff] [blame] | 1357 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { |
| 1358 | /* byte sub/cmp, then NZ --> test dst!=src */ |
| 1359 | return unop(Iop_1Uto64, |
| 1360 | binop(Iop_CmpNE8, |
| 1361 | unop(Iop_64to8,cc_dep1), |
| 1362 | unop(Iop_64to8,cc_dep2))); |
| 1363 | } |
| 1364 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1365 | /* 6, */ |
sewardj | e430418 | 2011-06-06 10:17:46 +0000 | [diff] [blame] | 1366 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) { |
| 1367 | /* byte sub/cmp, then BE (unsigned less than or equal) |
| 1368 | --> test dst <=u src */ |
| 1369 | return unop(Iop_1Uto64, |
| 1370 | binop(Iop_CmpLE64U, |
| 1371 | binop(Iop_And64, cc_dep1, mkU64(0xFF)), |
| 1372 | binop(Iop_And64, cc_dep2, mkU64(0xFF)))); |
| 1373 | } |
| 1374 | |
sewardj | 66a5e81 | 2015-02-04 19:05:13 +0000 | [diff] [blame] | 1375 | /* 8, 9 */ |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1376 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) |
| 1377 | && isU64(cc_dep2, 0)) { |
| 1378 | /* byte sub/cmp of zero, then S --> test (dst-0 <s 0) |
| 1379 | --> test dst <s 0 |
| 1380 | --> (ULong)dst[7] |
| 1381 | This is yet another scheme by which gcc figures out if the |
| 1382 | top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ |
| 1383 | /* Note: isU64(cc_dep2, 0) is correct, even though this is |
| 1384 | for an 8-bit comparison, since the args to the helper |
| 1385 | function are always U64s. */ |
| 1386 | return binop(Iop_And64, |
| 1387 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1388 | mkU64(1)); |
| 1389 | } |
sewardj | cd538b4 | 2008-03-31 21:57:17 +0000 | [diff] [blame] | 1390 | if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) |
| 1391 | && isU64(cc_dep2, 0)) { |
| 1392 | /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0) |
| 1393 | --> test !(dst <s 0) |
| 1394 | --> (ULong) !dst[7] |
| 1395 | */ |
| 1396 | return binop(Iop_Xor64, |
| 1397 | binop(Iop_And64, |
| 1398 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1399 | mkU64(1)), |
| 1400 | mkU64(1)); |
| 1401 | } |
sewardj | 3be608d | 2006-05-25 18:48:12 +0000 | [diff] [blame] | 1402 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1403 | /*---------------- LOGICQ ----------------*/ |
| 1404 | |
| 1405 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { |
| 1406 | /* long long and/or/xor, then Z --> test dst==0 */ |
| 1407 | return unop(Iop_1Uto64, |
| 1408 | binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); |
| 1409 | } |
sewardj | 0cd7473 | 2011-07-07 13:58:10 +0000 | [diff] [blame] | 1410 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) { |
| 1411 | /* long long and/or/xor, then NZ --> test dst!=0 */ |
| 1412 | return unop(Iop_1Uto64, |
| 1413 | binop(Iop_CmpNE64, cc_dep1, mkU64(0))); |
| 1414 | } |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1415 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1416 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { |
| 1417 | /* long long and/or/xor, then L |
| 1418 | LOGIC sets SF and ZF according to the |
| 1419 | result and makes OF be zero. L computes SF ^ OF, but |
| 1420 | OF is zero, so this reduces to SF -- which will be 1 iff |
| 1421 | the result is < signed 0. Hence ... |
| 1422 | */ |
| 1423 | return unop(Iop_1Uto64, |
| 1424 | binop(Iop_CmpLT64S, |
| 1425 | cc_dep1, |
| 1426 | mkU64(0))); |
| 1427 | } |
| 1428 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1429 | /*---------------- LOGICL ----------------*/ |
| 1430 | |
| 1431 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { |
| 1432 | /* long and/or/xor, then Z --> test dst==0 */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1433 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1434 | binop(Iop_CmpEQ32, |
| 1435 | unop(Iop_64to32, cc_dep1), |
| 1436 | mkU32(0))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1437 | } |
sewardj | 005b4ef | 2005-07-20 01:12:48 +0000 | [diff] [blame] | 1438 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { |
| 1439 | /* long and/or/xor, then NZ --> test dst!=0 */ |
| 1440 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1441 | binop(Iop_CmpNE32, |
| 1442 | unop(Iop_64to32, cc_dep1), |
| 1443 | mkU32(0))); |
sewardj | 005b4ef | 2005-07-20 01:12:48 +0000 | [diff] [blame] | 1444 | } |
| 1445 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1446 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { |
| 1447 | /* long and/or/xor, then LE |
| 1448 | This is pretty subtle. LOGIC sets SF and ZF according to the |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1449 | result and makes OF be zero. LE computes (SF ^ OF) | ZF, but |
| 1450 | OF is zero, so this reduces to SF | ZF -- which will be 1 iff |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1451 | the result is <=signed 0. Hence ... |
| 1452 | */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1453 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1454 | binop(Iop_CmpLE32S, |
| 1455 | unop(Iop_64to32, cc_dep1), |
| 1456 | mkU32(0))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1457 | } |
| 1458 | |
sewardj | e430418 | 2011-06-06 10:17:46 +0000 | [diff] [blame] | 1459 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) { |
| 1460 | /* long and/or/xor, then S --> (ULong)result[31] */ |
| 1461 | return binop(Iop_And64, |
| 1462 | binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| 1463 | mkU64(1)); |
| 1464 | } |
| 1465 | if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) { |
| 1466 | /* long and/or/xor, then S --> (ULong) ~ result[31] */ |
| 1467 | return binop(Iop_Xor64, |
| 1468 | binop(Iop_And64, |
| 1469 | binop(Iop_Shr64, cc_dep1, mkU8(31)), |
| 1470 | mkU64(1)), |
| 1471 | mkU64(1)); |
| 1472 | } |
| 1473 | |
sewardj | 61acf4c | 2012-04-25 14:33:03 +0000 | [diff] [blame] | 1474 | /*---------------- LOGICW ----------------*/ |
| 1475 | |
| 1476 | if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) { |
| 1477 | /* word and/or/xor, then Z --> test dst==0 */ |
| 1478 | return unop(Iop_1Uto64, |
| 1479 | binop(Iop_CmpEQ64, |
| 1480 | binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| 1481 | mkU64(0))); |
| 1482 | } |
| 1483 | if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) { |
| 1484 | /* word and/or/xor, then NZ --> test dst!=0 */ |
| 1485 | return unop(Iop_1Uto64, |
| 1486 | binop(Iop_CmpNE64, |
| 1487 | binop(Iop_And64, cc_dep1, mkU64(0xFFFF)), |
| 1488 | mkU64(0))); |
| 1489 | } |
| 1490 | |
sewardj | 4b06a0b | 2005-11-13 19:51:04 +0000 | [diff] [blame] | 1491 | /*---------------- LOGICB ----------------*/ |
| 1492 | |
| 1493 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { |
| 1494 | /* byte and/or/xor, then Z --> test dst==0 */ |
| 1495 | return unop(Iop_1Uto64, |
| 1496 | binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| 1497 | mkU64(0))); |
| 1498 | } |
sewardj | ff6b34a | 2010-01-15 09:54:55 +0000 | [diff] [blame] | 1499 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) { |
| 1500 | /* byte and/or/xor, then NZ --> test dst!=0 */ |
| 1501 | return unop(Iop_1Uto64, |
| 1502 | binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)), |
| 1503 | mkU64(0))); |
| 1504 | } |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1505 | |
sewardj | 346d9a1 | 2006-05-21 01:02:31 +0000 | [diff] [blame] | 1506 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { |
| 1507 | /* this is an idiom gcc sometimes uses to find out if the top |
| 1508 | bit of a byte register is set: eg testb %al,%al; js .. |
| 1509 | Since it just depends on the top bit of the byte, extract |
| 1510 | that bit and explicitly get rid of all the rest. This |
| 1511 | helps memcheck avoid false positives in the case where any |
| 1512 | of the other bits in the byte are undefined. */ |
| 1513 | /* byte and/or/xor, then S --> (UInt)result[7] */ |
| 1514 | return binop(Iop_And64, |
| 1515 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1516 | mkU64(1)); |
| 1517 | } |
sewardj | a6d0809 | 2011-03-27 22:16:08 +0000 | [diff] [blame] | 1518 | if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) { |
| 1519 | /* byte and/or/xor, then NS --> (UInt)!result[7] */ |
| 1520 | return binop(Iop_Xor64, |
| 1521 | binop(Iop_And64, |
| 1522 | binop(Iop_Shr64,cc_dep1,mkU8(7)), |
| 1523 | mkU64(1)), |
| 1524 | mkU64(1)); |
| 1525 | } |
sewardj | 346d9a1 | 2006-05-21 01:02:31 +0000 | [diff] [blame] | 1526 | |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1527 | /*---------------- INCB ----------------*/ |
| 1528 | |
| 1529 | if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { |
sewardj | 4df975f | 2010-02-28 04:51:02 +0000 | [diff] [blame] | 1530 | /* 8-bit inc, then LE --> sign bit of the arg */ |
| 1531 | return binop(Iop_And64, |
| 1532 | binop(Iop_Shr64, |
| 1533 | binop(Iop_Sub64, cc_dep1, mkU64(1)), |
| 1534 | mkU8(7)), |
| 1535 | mkU64(1)); |
sewardj | 3f81c4e | 2005-07-20 00:30:37 +0000 | [diff] [blame] | 1536 | } |
| 1537 | |
sewardj | 7784bd2 | 2006-12-29 01:54:36 +0000 | [diff] [blame] | 1538 | /*---------------- INCW ----------------*/ |
| 1539 | |
| 1540 | if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { |
| 1541 | /* 16-bit inc, then Z --> test dst == 0 */ |
| 1542 | return unop(Iop_1Uto64, |
| 1543 | binop(Iop_CmpEQ64, |
| 1544 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1545 | mkU64(0))); |
| 1546 | } |
| 1547 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1548 | /*---------------- DECL ----------------*/ |
| 1549 | |
| 1550 | if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { |
| 1551 | /* dec L, then Z --> test dst == 0 */ |
| 1552 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1553 | binop(Iop_CmpEQ32, |
| 1554 | unop(Iop_64to32, cc_dep1), |
| 1555 | mkU32(0))); |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1556 | } |
| 1557 | |
sewardj | b6d02ea | 2005-08-01 13:35:18 +0000 | [diff] [blame] | 1558 | /*---------------- DECW ----------------*/ |
| 1559 | |
| 1560 | if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { |
| 1561 | /* 16-bit dec, then NZ --> test dst != 0 */ |
| 1562 | return unop(Iop_1Uto64, |
| 1563 | binop(Iop_CmpNE64, |
| 1564 | binop(Iop_Shl64,cc_dep1,mkU8(48)), |
| 1565 | mkU64(0))); |
| 1566 | } |
| 1567 | |
sewardj | 7fc494b | 2005-05-05 12:05:11 +0000 | [diff] [blame] | 1568 | /*---------------- COPY ----------------*/ |
| 1569 | /* This can happen, as a result of amd64 FP compares: "comisd ... ; |
| 1570 | jbe" for example. */ |
| 1571 | |
| 1572 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && |
| 1573 | (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { |
| 1574 | /* COPY, then BE --> extract C and Z from dep1, and test (C |
| 1575 | or Z == 1). */ |
| 1576 | /* COPY, then NBE --> extract C and Z from dep1, and test (C |
| 1577 | or Z == 0). */ |
| 1578 | ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; |
| 1579 | return |
| 1580 | unop( |
| 1581 | Iop_1Uto64, |
| 1582 | binop( |
| 1583 | Iop_CmpEQ64, |
| 1584 | binop( |
| 1585 | Iop_And64, |
| 1586 | binop( |
| 1587 | Iop_Or64, |
| 1588 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| 1589 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) |
| 1590 | ), |
| 1591 | mkU64(1) |
| 1592 | ), |
| 1593 | mkU64(nnn) |
| 1594 | ) |
| 1595 | ); |
| 1596 | } |
| 1597 | |
sewardj | 9f05a64 | 2005-05-12 02:14:52 +0000 | [diff] [blame] | 1598 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { |
| 1599 | /* COPY, then B --> extract C dep1, and test (C == 1). */ |
| 1600 | return |
| 1601 | unop( |
| 1602 | Iop_1Uto64, |
| 1603 | binop( |
| 1604 | Iop_CmpNE64, |
| 1605 | binop( |
| 1606 | Iop_And64, |
| 1607 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), |
| 1608 | mkU64(1) |
| 1609 | ), |
| 1610 | mkU64(0) |
| 1611 | ) |
| 1612 | ); |
| 1613 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1614 | |
sewardj | b235e5b | 2006-11-27 04:09:52 +0000 | [diff] [blame] | 1615 | if (isU64(cc_op, AMD64G_CC_OP_COPY) |
| 1616 | && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { |
| 1617 | /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ |
| 1618 | /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ |
| 1619 | UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; |
| 1620 | return |
| 1621 | unop( |
| 1622 | Iop_1Uto64, |
| 1623 | binop( |
| 1624 | Iop_CmpEQ64, |
| 1625 | binop( |
| 1626 | Iop_And64, |
| 1627 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), |
| 1628 | mkU64(1) |
| 1629 | ), |
| 1630 | mkU64(nnn) |
| 1631 | ) |
| 1632 | ); |
| 1633 | } |
| 1634 | |
| 1635 | if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { |
| 1636 | /* COPY, then P --> extract P from dep1, and test (P == 1). */ |
| 1637 | return |
| 1638 | unop( |
| 1639 | Iop_1Uto64, |
| 1640 | binop( |
| 1641 | Iop_CmpNE64, |
| 1642 | binop( |
| 1643 | Iop_And64, |
| 1644 | binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), |
| 1645 | mkU64(1) |
| 1646 | ), |
| 1647 | mkU64(0) |
| 1648 | ) |
| 1649 | ); |
| 1650 | } |
| 1651 | |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1652 | return NULL; |
| 1653 | } |
| 1654 | |
| 1655 | /* --------- specialising "amd64g_calculate_rflags_c" --------- */ |
| 1656 | |
| 1657 | if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { |
| 1658 | /* specialise calls to above "calculate_rflags_c" function */ |
| 1659 | IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; |
| 1660 | vassert(arity == 4); |
| 1661 | cc_op = args[0]; |
| 1662 | cc_dep1 = args[1]; |
| 1663 | cc_dep2 = args[2]; |
| 1664 | cc_ndep = args[3]; |
| 1665 | |
sewardj | 77fd846 | 2005-11-13 20:30:24 +0000 | [diff] [blame] | 1666 | if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { |
| 1667 | /* C after sub denotes unsigned less than */ |
| 1668 | return unop(Iop_1Uto64, |
| 1669 | binop(Iop_CmpLT64U, |
| 1670 | cc_dep1, |
| 1671 | cc_dep2)); |
| 1672 | } |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1673 | if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { |
| 1674 | /* C after sub denotes unsigned less than */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1675 | return unop(Iop_1Uto64, |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1676 | binop(Iop_CmpLT32U, |
| 1677 | unop(Iop_64to32, cc_dep1), |
| 1678 | unop(Iop_64to32, cc_dep2))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1679 | } |
| 1680 | if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { |
| 1681 | /* C after sub denotes unsigned less than */ |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1682 | return unop(Iop_1Uto64, |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1683 | binop(Iop_CmpLT64U, |
| 1684 | binop(Iop_And64,cc_dep1,mkU64(0xFF)), |
sewardj | 6d709a9 | 2005-04-27 11:52:40 +0000 | [diff] [blame] | 1685 | binop(Iop_And64,cc_dep2,mkU64(0xFF)))); |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1686 | } |
| 1687 | if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) |
| 1688 | || isU64(cc_op, AMD64G_CC_OP_LOGICL) |
| 1689 | || isU64(cc_op, AMD64G_CC_OP_LOGICW) |
| 1690 | || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { |
| 1691 | /* cflag after logic is zero */ |
| 1692 | return mkU64(0); |
| 1693 | } |
| 1694 | if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) |
| 1695 | || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { |
| 1696 | /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ |
| 1697 | return cc_ndep; |
| 1698 | } |
sewardj | 7784bd2 | 2006-12-29 01:54:36 +0000 | [diff] [blame] | 1699 | |
| 1700 | # if 0 |
| 1701 | if (cc_op->tag == Iex_Const) { |
| 1702 | vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); |
| 1703 | } |
| 1704 | # endif |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1705 | |
| 1706 | return NULL; |
| 1707 | } |
| 1708 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1709 | # undef unop |
| 1710 | # undef binop |
sewardj | 0354035 | 2005-04-26 01:53:48 +0000 | [diff] [blame] | 1711 | # undef mkU64 |
sewardj | 9cc2bbf | 2011-06-05 17:56:03 +0000 | [diff] [blame] | 1712 | # undef mkU32 |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1713 | # undef mkU8 |
| 1714 | |
| 1715 | return NULL; |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 1716 | } |
| 1717 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 1718 | |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 1719 | /*---------------------------------------------------------------*/ |
| 1720 | /*--- Supporting functions for x87 FPU activities. ---*/ |
| 1721 | /*---------------------------------------------------------------*/ |
| 1722 | |
sewardj | 4f9847d | 2005-07-25 11:58:34 +0000 | [diff] [blame] | 1723 | static inline Bool host_is_little_endian ( void ) |
| 1724 | { |
| 1725 | UInt x = 0x76543210; |
| 1726 | UChar* p = (UChar*)(&x); |
| 1727 | return toBool(*p == 0x10); |
| 1728 | } |
| 1729 | |
| 1730 | /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ |
| 1731 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 1732 | ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) |
| 1733 | { |
| 1734 | Bool mantissaIsZero; |
| 1735 | Int bexp; |
| 1736 | UChar sign; |
| 1737 | UChar* f64; |
| 1738 | |
| 1739 | vassert(host_is_little_endian()); |
| 1740 | |
| 1741 | /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ |
| 1742 | |
| 1743 | f64 = (UChar*)(&dbl); |
| 1744 | sign = toUChar( (f64[7] >> 7) & 1 ); |
| 1745 | |
| 1746 | /* First off, if the tag indicates the register was empty, |
| 1747 | return 1,0,sign,1 */ |
| 1748 | if (tag == 0) { |
| 1749 | /* vex_printf("Empty\n"); */ |
| 1750 | return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) |
| 1751 | | AMD64G_FC_MASK_C0; |
| 1752 | } |
| 1753 | |
| 1754 | bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); |
| 1755 | bexp &= 0x7FF; |
| 1756 | |
| 1757 | mantissaIsZero |
| 1758 | = toBool( |
| 1759 | (f64[6] & 0x0F) == 0 |
| 1760 | && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 |
| 1761 | ); |
| 1762 | |
| 1763 | /* If both exponent and mantissa are zero, the value is zero. |
| 1764 | Return 1,0,sign,0. */ |
| 1765 | if (bexp == 0 && mantissaIsZero) { |
| 1766 | /* vex_printf("Zero\n"); */ |
| 1767 | return AMD64G_FC_MASK_C3 | 0 |
| 1768 | | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1769 | } |
| 1770 | |
| 1771 | /* If exponent is zero but mantissa isn't, it's a denormal. |
| 1772 | Return 1,1,sign,0. */ |
| 1773 | if (bexp == 0 && !mantissaIsZero) { |
| 1774 | /* vex_printf("Denormal\n"); */ |
| 1775 | return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 |
| 1776 | | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1777 | } |
| 1778 | |
| 1779 | /* If the exponent is 7FF and the mantissa is zero, this is an infinity. |
| 1780 | Return 0,1,sign,1. */ |
| 1781 | if (bexp == 0x7FF && mantissaIsZero) { |
| 1782 | /* vex_printf("Inf\n"); */ |
| 1783 | return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) |
| 1784 | | AMD64G_FC_MASK_C0; |
| 1785 | } |
| 1786 | |
| 1787 | /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. |
| 1788 | Return 0,0,sign,1. */ |
| 1789 | if (bexp == 0x7FF && !mantissaIsZero) { |
| 1790 | /* vex_printf("NaN\n"); */ |
| 1791 | return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; |
| 1792 | } |
| 1793 | |
| 1794 | /* Uh, ok, we give up. It must be a normal finite number. |
| 1795 | Return 0,1,sign,0. |
| 1796 | */ |
| 1797 | /* vex_printf("normal\n"); */ |
| 1798 | return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; |
| 1799 | } |
| 1800 | |
| 1801 | |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1802 | /* This is used to implement both 'frstor' and 'fldenv'. The latter |
| 1803 | appears to differ from the former only in that the 8 FP registers |
| 1804 | themselves are not transferred into the guest state. */ |
| 1805 | static |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1806 | VexEmNote do_put_x87 ( Bool moveRegs, |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1807 | /*IN*/UChar* x87_state, |
| 1808 | /*OUT*/VexGuestAMD64State* vex_state ) |
| 1809 | { |
| 1810 | Int stno, preg; |
| 1811 | UInt tag; |
| 1812 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 1813 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 1814 | Fpu_State* x87 = (Fpu_State*)x87_state; |
| 1815 | UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; |
| 1816 | UInt tagw = x87->env[FP_ENV_TAG]; |
| 1817 | UInt fpucw = x87->env[FP_ENV_CTRL]; |
| 1818 | UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1819 | VexEmNote ew; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1820 | UInt fpround; |
| 1821 | ULong pair; |
| 1822 | |
| 1823 | /* Copy registers and tags */ |
| 1824 | for (stno = 0; stno < 8; stno++) { |
| 1825 | preg = (stno + ftop) & 7; |
| 1826 | tag = (tagw >> (2*preg)) & 3; |
| 1827 | if (tag == 3) { |
| 1828 | /* register is empty */ |
| 1829 | /* hmm, if it's empty, does it still get written? Probably |
| 1830 | safer to say it does. If we don't, memcheck could get out |
| 1831 | of sync, in that it thinks all FP registers are defined by |
| 1832 | this helper, but in reality some have not been updated. */ |
| 1833 | if (moveRegs) |
| 1834 | vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| 1835 | vexTags[preg] = 0; |
| 1836 | } else { |
| 1837 | /* register is non-empty */ |
| 1838 | if (moveRegs) |
| 1839 | convert_f80le_to_f64le( &x87->reg[10*stno], |
| 1840 | (UChar*)&vexRegs[preg] ); |
| 1841 | vexTags[preg] = 1; |
| 1842 | } |
| 1843 | } |
| 1844 | |
| 1845 | /* stack pointer */ |
| 1846 | vex_state->guest_FTOP = ftop; |
| 1847 | |
| 1848 | /* status word */ |
| 1849 | vex_state->guest_FC3210 = c3210; |
| 1850 | |
| 1851 | /* handle the control word, setting FPROUND and detecting any |
| 1852 | emulation warnings. */ |
| 1853 | pair = amd64g_check_fldcw ( (ULong)fpucw ); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 1854 | fpround = (UInt)pair & 0xFFFFFFFFULL; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 1855 | ew = (VexEmNote)(pair >> 32); |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1856 | |
| 1857 | vex_state->guest_FPROUND = fpround & 3; |
| 1858 | |
| 1859 | /* emulation warnings --> caller */ |
| 1860 | return ew; |
| 1861 | } |
| 1862 | |
| 1863 | |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 1864 | /* Create an x87 FPU state from the guest state, as close as |
| 1865 | we can approximate it. */ |
| 1866 | static |
| 1867 | void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, |
| 1868 | /*OUT*/UChar* x87_state ) |
| 1869 | { |
| 1870 | Int i, stno, preg; |
| 1871 | UInt tagw; |
| 1872 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 1873 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 1874 | Fpu_State* x87 = (Fpu_State*)x87_state; |
| 1875 | UInt ftop = vex_state->guest_FTOP; |
| 1876 | UInt c3210 = vex_state->guest_FC3210; |
| 1877 | |
| 1878 | for (i = 0; i < 14; i++) |
| 1879 | x87->env[i] = 0; |
| 1880 | |
| 1881 | x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| 1882 | x87->env[FP_ENV_STAT] |
| 1883 | = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
| 1884 | x87->env[FP_ENV_CTRL] |
| 1885 | = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| 1886 | |
| 1887 | /* Dump the register stack in ST order. */ |
| 1888 | tagw = 0; |
| 1889 | for (stno = 0; stno < 8; stno++) { |
| 1890 | preg = (stno + ftop) & 7; |
| 1891 | if (vexTags[preg] == 0) { |
| 1892 | /* register is empty */ |
| 1893 | tagw |= (3 << (2*preg)); |
| 1894 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 1895 | &x87->reg[10*stno] ); |
| 1896 | } else { |
| 1897 | /* register is full. */ |
| 1898 | tagw |= (0 << (2*preg)); |
| 1899 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 1900 | &x87->reg[10*stno] ); |
| 1901 | } |
| 1902 | } |
| 1903 | x87->env[FP_ENV_TAG] = toUShort(tagw); |
| 1904 | } |
| 1905 | |
| 1906 | |
| 1907 | /* CALLED FROM GENERATED CODE */ |
| 1908 | /* DIRTY HELPER (reads guest state, writes guest mem) */ |
| 1909 | /* NOTE: only handles 32-bit format (no REX.W on the insn) */ |
sewardj | 28d71ed | 2014-09-07 23:23:17 +0000 | [diff] [blame] | 1910 | void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, |
| 1911 | HWord addr ) |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 1912 | { |
| 1913 | /* Derived from values obtained from |
| 1914 | vendor_id : AuthenticAMD |
| 1915 | cpu family : 15 |
| 1916 | model : 12 |
| 1917 | model name : AMD Athlon(tm) 64 Processor 3200+ |
| 1918 | stepping : 0 |
| 1919 | cpu MHz : 2200.000 |
| 1920 | cache size : 512 KB |
| 1921 | */ |
| 1922 | /* Somewhat roundabout, but at least it's simple. */ |
| 1923 | Fpu_State tmp; |
| 1924 | UShort* addrS = (UShort*)addr; |
| 1925 | UChar* addrC = (UChar*)addr; |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 1926 | UInt mxcsr; |
| 1927 | UShort fp_tags; |
| 1928 | UInt summary_tags; |
| 1929 | Int r, stno; |
| 1930 | UShort *srcS, *dstS; |
| 1931 | |
| 1932 | do_get_x87( gst, (UChar*)&tmp ); |
| 1933 | mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); |
| 1934 | |
| 1935 | /* Now build the proper fxsave image from the x87 image we just |
| 1936 | made. */ |
| 1937 | |
| 1938 | addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ |
| 1939 | addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ |
| 1940 | |
| 1941 | /* set addrS[2] in an endian-independent way */ |
| 1942 | summary_tags = 0; |
| 1943 | fp_tags = tmp.env[FP_ENV_TAG]; |
| 1944 | for (r = 0; r < 8; r++) { |
| 1945 | if ( ((fp_tags >> (2*r)) & 3) != 3 ) |
| 1946 | summary_tags |= (1 << r); |
| 1947 | } |
| 1948 | addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ |
| 1949 | addrC[5] = 0; /* pad */ |
| 1950 | |
| 1951 | /* FOP: faulting fpu opcode. From experimentation, the real CPU |
| 1952 | does not write this field. (?!) */ |
| 1953 | addrS[3] = 0; /* BOGUS */ |
| 1954 | |
| 1955 | /* RIP (Last x87 instruction pointer). From experimentation, the |
| 1956 | real CPU does not write this field. (?!) */ |
| 1957 | addrS[4] = 0; /* BOGUS */ |
| 1958 | addrS[5] = 0; /* BOGUS */ |
| 1959 | addrS[6] = 0; /* BOGUS */ |
| 1960 | addrS[7] = 0; /* BOGUS */ |
| 1961 | |
| 1962 | /* RDP (Last x87 data pointer). From experimentation, the real CPU |
| 1963 | does not write this field. (?!) */ |
| 1964 | addrS[8] = 0; /* BOGUS */ |
| 1965 | addrS[9] = 0; /* BOGUS */ |
| 1966 | addrS[10] = 0; /* BOGUS */ |
| 1967 | addrS[11] = 0; /* BOGUS */ |
| 1968 | |
| 1969 | addrS[12] = toUShort(mxcsr); /* MXCSR */ |
| 1970 | addrS[13] = toUShort(mxcsr >> 16); |
| 1971 | |
| 1972 | addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ |
| 1973 | addrS[15] = 0x0000; /* MXCSR mask (hi16) */ |
| 1974 | |
| 1975 | /* Copy in the FP registers, in ST order. */ |
| 1976 | for (stno = 0; stno < 8; stno++) { |
| 1977 | srcS = (UShort*)(&tmp.reg[10*stno]); |
| 1978 | dstS = (UShort*)(&addrS[16 + 8*stno]); |
| 1979 | dstS[0] = srcS[0]; |
| 1980 | dstS[1] = srcS[1]; |
| 1981 | dstS[2] = srcS[2]; |
| 1982 | dstS[3] = srcS[3]; |
| 1983 | dstS[4] = srcS[4]; |
| 1984 | dstS[5] = 0; |
| 1985 | dstS[6] = 0; |
| 1986 | dstS[7] = 0; |
| 1987 | } |
| 1988 | |
| 1989 | /* That's the first 160 bytes of the image done. Now only %xmm0 |
sewardj | 28d71ed | 2014-09-07 23:23:17 +0000 | [diff] [blame] | 1990 | .. %xmm15 remain to be copied, and we let the generated IR do |
| 1991 | that, so as to make Memcheck's definedness flow for the non-XMM |
| 1992 | parts independant from that of the all the other control and |
| 1993 | status words in the structure. This avoids the false positives |
| 1994 | shown in #291310. */ |
sewardj | 5abcfe6 | 2007-01-10 04:59:33 +0000 | [diff] [blame] | 1995 | } |
| 1996 | |
| 1997 | |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 1998 | /* CALLED FROM GENERATED CODE */ |
| 1999 | /* DIRTY HELPER (writes guest state, reads guest mem) */ |
sewardj | 28d71ed | 2014-09-07 23:23:17 +0000 | [diff] [blame] | 2000 | VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, |
| 2001 | HWord addr ) |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2002 | { |
| 2003 | Fpu_State tmp; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2004 | VexEmNote warnX87 = EmNote_NONE; |
| 2005 | VexEmNote warnXMM = EmNote_NONE; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2006 | UShort* addrS = (UShort*)addr; |
| 2007 | UChar* addrC = (UChar*)addr; |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2008 | UShort fp_tags; |
| 2009 | Int r, stno, i; |
| 2010 | |
sewardj | 28d71ed | 2014-09-07 23:23:17 +0000 | [diff] [blame] | 2011 | /* Don't restore %xmm0 .. %xmm15, for the same reasons that |
| 2012 | amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See |
| 2013 | comment in that function for details. */ |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2014 | |
| 2015 | /* Copy the x87 registers out of the image, into a temporary |
| 2016 | Fpu_State struct. */ |
| 2017 | for (i = 0; i < 14; i++) tmp.env[i] = 0; |
| 2018 | for (i = 0; i < 80; i++) tmp.reg[i] = 0; |
| 2019 | /* fill in tmp.reg[0..7] */ |
| 2020 | for (stno = 0; stno < 8; stno++) { |
| 2021 | UShort* dstS = (UShort*)(&tmp.reg[10*stno]); |
| 2022 | UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); |
| 2023 | dstS[0] = srcS[0]; |
| 2024 | dstS[1] = srcS[1]; |
| 2025 | dstS[2] = srcS[2]; |
| 2026 | dstS[3] = srcS[3]; |
| 2027 | dstS[4] = srcS[4]; |
| 2028 | } |
| 2029 | /* fill in tmp.env[0..13] */ |
| 2030 | tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ |
| 2031 | tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ |
| 2032 | |
| 2033 | fp_tags = 0; |
| 2034 | for (r = 0; r < 8; r++) { |
| 2035 | if (addrC[4] & (1<<r)) |
| 2036 | fp_tags |= (0 << (2*r)); /* EMPTY */ |
| 2037 | else |
| 2038 | fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */ |
| 2039 | } |
| 2040 | tmp.env[FP_ENV_TAG] = fp_tags; |
| 2041 | |
| 2042 | /* Now write 'tmp' into the guest state. */ |
| 2043 | warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst ); |
| 2044 | |
| 2045 | { UInt w32 = (((UInt)addrS[12]) & 0xFFFF) |
| 2046 | | ((((UInt)addrS[13]) & 0xFFFF) << 16); |
| 2047 | ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 ); |
| 2048 | |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2049 | warnXMM = (VexEmNote)(w64 >> 32); |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2050 | |
| 2051 | gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; |
| 2052 | } |
| 2053 | |
| 2054 | /* Prefer an X87 emwarn over an XMM one, if both exist. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2055 | if (warnX87 != EmNote_NONE) |
sewardj | 5556e5e | 2011-01-21 18:05:19 +0000 | [diff] [blame] | 2056 | return warnX87; |
| 2057 | else |
| 2058 | return warnXMM; |
| 2059 | } |
| 2060 | |
| 2061 | |
sewardj | 0585a03 | 2005-11-05 02:55:06 +0000 | [diff] [blame] | 2062 | /* DIRTY HELPER (writes guest state) */ |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 2063 | /* Initialise the x87 FPU state as per 'finit'. */ |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 2064 | void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) |
| 2065 | { |
| 2066 | Int i; |
| 2067 | gst->guest_FTOP = 0; |
| 2068 | for (i = 0; i < 8; i++) { |
| 2069 | gst->guest_FPTAG[i] = 0; /* empty */ |
| 2070 | gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ |
| 2071 | } |
| 2072 | gst->guest_FPROUND = (ULong)Irrm_NEAREST; |
| 2073 | gst->guest_FC3210 = 0; |
| 2074 | } |
| 2075 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2076 | |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2077 | /* CALLED FROM GENERATED CODE */ |
| 2078 | /* DIRTY HELPER (reads guest memory) */ |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2079 | ULong amd64g_dirtyhelper_loadF80le ( Addr addrU ) |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2080 | { |
| 2081 | ULong f64; |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2082 | convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 ); |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2083 | return f64; |
| 2084 | } |
| 2085 | |
| 2086 | /* CALLED FROM GENERATED CODE */ |
| 2087 | /* DIRTY HELPER (writes guest memory) */ |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2088 | void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 ) |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2089 | { |
florian | bdf99f0 | 2015-01-04 17:20:19 +0000 | [diff] [blame] | 2090 | convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU ); |
sewardj | 924215b | 2005-03-26 21:50:31 +0000 | [diff] [blame] | 2091 | } |
| 2092 | |
| 2093 | |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2094 | /* CALLED FROM GENERATED CODE */ |
| 2095 | /* CLEAN HELPER */ |
| 2096 | /* mxcsr[15:0] contains a SSE native format MXCSR value. |
| 2097 | Extract from it the required SSEROUND value and any resulting |
| 2098 | emulation warning, and return (warn << 32) | sseround value. |
| 2099 | */ |
| 2100 | ULong amd64g_check_ldmxcsr ( ULong mxcsr ) |
| 2101 | { |
| 2102 | /* Decide on a rounding mode. mxcsr[14:13] holds it. */ |
| 2103 | /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| 2104 | ULong rmode = (mxcsr >> 13) & 3; |
| 2105 | |
| 2106 | /* Detect any required emulation warnings. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2107 | VexEmNote ew = EmNote_NONE; |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2108 | |
| 2109 | if ((mxcsr & 0x1F80) != 0x1F80) { |
| 2110 | /* unmasked exceptions! */ |
| 2111 | ew = EmWarn_X86_sseExns; |
| 2112 | } |
| 2113 | else |
| 2114 | if (mxcsr & (1<<15)) { |
| 2115 | /* FZ is set */ |
| 2116 | ew = EmWarn_X86_fz; |
| 2117 | } |
| 2118 | else |
| 2119 | if (mxcsr & (1<<6)) { |
| 2120 | /* DAZ is set */ |
| 2121 | ew = EmWarn_X86_daz; |
| 2122 | } |
| 2123 | |
| 2124 | return (((ULong)ew) << 32) | ((ULong)rmode); |
| 2125 | } |
| 2126 | |
| 2127 | |
| 2128 | /* CALLED FROM GENERATED CODE */ |
| 2129 | /* CLEAN HELPER */ |
| 2130 | /* Given sseround as an IRRoundingMode value, create a suitable SSE |
| 2131 | native format MXCSR value. */ |
| 2132 | ULong amd64g_create_mxcsr ( ULong sseround ) |
| 2133 | { |
| 2134 | sseround &= 3; |
| 2135 | return 0x1F80 | (sseround << 13); |
| 2136 | } |
| 2137 | |
| 2138 | |
sewardj | 5e20537 | 2005-05-09 02:57:08 +0000 | [diff] [blame] | 2139 | /* CLEAN HELPER */ |
| 2140 | /* fpucw[15:0] contains a x87 native format FPU control word. |
| 2141 | Extract from it the required FPROUND value and any resulting |
| 2142 | emulation warning, and return (warn << 32) | fpround value. |
| 2143 | */ |
| 2144 | ULong amd64g_check_fldcw ( ULong fpucw ) |
| 2145 | { |
| 2146 | /* Decide on a rounding mode. fpucw[11:10] holds it. */ |
| 2147 | /* NOTE, encoded exactly as per enum IRRoundingMode. */ |
| 2148 | ULong rmode = (fpucw >> 10) & 3; |
| 2149 | |
| 2150 | /* Detect any required emulation warnings. */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2151 | VexEmNote ew = EmNote_NONE; |
sewardj | 5e20537 | 2005-05-09 02:57:08 +0000 | [diff] [blame] | 2152 | |
| 2153 | if ((fpucw & 0x3F) != 0x3F) { |
| 2154 | /* unmasked exceptions! */ |
| 2155 | ew = EmWarn_X86_x87exns; |
| 2156 | } |
| 2157 | else |
| 2158 | if (((fpucw >> 8) & 3) != 3) { |
| 2159 | /* unsupported precision */ |
| 2160 | ew = EmWarn_X86_x87precision; |
| 2161 | } |
| 2162 | |
| 2163 | return (((ULong)ew) << 32) | ((ULong)rmode); |
| 2164 | } |
| 2165 | |
| 2166 | |
| 2167 | /* CLEAN HELPER */ |
| 2168 | /* Given fpround as an IRRoundingMode value, create a suitable x87 |
| 2169 | native format FPU control word. */ |
| 2170 | ULong amd64g_create_fpucw ( ULong fpround ) |
| 2171 | { |
| 2172 | fpround &= 3; |
| 2173 | return 0x037F | (fpround << 10); |
| 2174 | } |
| 2175 | |
sewardj | bcbb9de | 2005-03-27 02:22:32 +0000 | [diff] [blame] | 2176 | |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2177 | /* This is used to implement 'fldenv'. |
| 2178 | Reads 28 bytes at x87_state[0 .. 27]. */ |
| 2179 | /* CALLED FROM GENERATED CODE */ |
| 2180 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2181 | VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2182 | /*IN*/HWord x87_state) |
| 2183 | { |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2184 | return do_put_x87( False, (UChar*)x87_state, vex_state ); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2185 | } |
| 2186 | |
| 2187 | |
| 2188 | /* CALLED FROM GENERATED CODE */ |
| 2189 | /* DIRTY HELPER */ |
| 2190 | /* Create an x87 FPU env from the guest state, as close as we can |
| 2191 | approximate it. Writes 28 bytes at x87_state[0..27]. */ |
| 2192 | void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, |
| 2193 | /*OUT*/HWord x87_state ) |
| 2194 | { |
| 2195 | Int i, stno, preg; |
| 2196 | UInt tagw; |
| 2197 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2198 | Fpu_State* x87 = (Fpu_State*)x87_state; |
| 2199 | UInt ftop = vex_state->guest_FTOP; |
| 2200 | ULong c3210 = vex_state->guest_FC3210; |
| 2201 | |
| 2202 | for (i = 0; i < 14; i++) |
| 2203 | x87->env[i] = 0; |
| 2204 | |
| 2205 | x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; |
| 2206 | x87->env[FP_ENV_STAT] |
sewardj | 81d72ea | 2005-06-14 21:59:16 +0000 | [diff] [blame] | 2207 | = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2208 | x87->env[FP_ENV_CTRL] |
sewardj | 81d72ea | 2005-06-14 21:59:16 +0000 | [diff] [blame] | 2209 | = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); |
sewardj | 4017a3b | 2005-06-13 12:17:27 +0000 | [diff] [blame] | 2210 | |
| 2211 | /* Compute the x87 tag word. */ |
| 2212 | tagw = 0; |
| 2213 | for (stno = 0; stno < 8; stno++) { |
| 2214 | preg = (stno + ftop) & 7; |
| 2215 | if (vexTags[preg] == 0) { |
| 2216 | /* register is empty */ |
| 2217 | tagw |= (3 << (2*preg)); |
| 2218 | } else { |
| 2219 | /* register is full. */ |
| 2220 | tagw |= (0 << (2*preg)); |
| 2221 | } |
| 2222 | } |
| 2223 | x87->env[FP_ENV_TAG] = toUShort(tagw); |
| 2224 | |
| 2225 | /* We don't dump the x87 registers, tho. */ |
| 2226 | } |
| 2227 | |
| 2228 | |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2229 | /* This is used to implement 'fnsave'. |
| 2230 | Writes 108 bytes at x87_state[0 .. 107]. */ |
| 2231 | /* CALLED FROM GENERATED CODE */ |
| 2232 | /* DIRTY HELPER */ |
| 2233 | void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state, |
| 2234 | /*OUT*/HWord x87_state) |
| 2235 | { |
| 2236 | do_get_x87( vex_state, (UChar*)x87_state ); |
| 2237 | } |
| 2238 | |
| 2239 | |
| 2240 | /* This is used to implement 'fnsaves'. |
| 2241 | Writes 94 bytes at x87_state[0 .. 93]. */ |
| 2242 | /* CALLED FROM GENERATED CODE */ |
| 2243 | /* DIRTY HELPER */ |
| 2244 | void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state, |
| 2245 | /*OUT*/HWord x87_state) |
| 2246 | { |
| 2247 | Int i, stno, preg; |
| 2248 | UInt tagw; |
| 2249 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 2250 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2251 | Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| 2252 | UInt ftop = vex_state->guest_FTOP; |
| 2253 | UInt c3210 = vex_state->guest_FC3210; |
| 2254 | |
| 2255 | for (i = 0; i < 7; i++) |
| 2256 | x87->env[i] = 0; |
| 2257 | |
| 2258 | x87->env[FPS_ENV_STAT] |
| 2259 | = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); |
| 2260 | x87->env[FPS_ENV_CTRL] |
| 2261 | = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); |
| 2262 | |
| 2263 | /* Dump the register stack in ST order. */ |
| 2264 | tagw = 0; |
| 2265 | for (stno = 0; stno < 8; stno++) { |
| 2266 | preg = (stno + ftop) & 7; |
| 2267 | if (vexTags[preg] == 0) { |
| 2268 | /* register is empty */ |
| 2269 | tagw |= (3 << (2*preg)); |
| 2270 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 2271 | &x87->reg[10*stno] ); |
| 2272 | } else { |
| 2273 | /* register is full. */ |
| 2274 | tagw |= (0 << (2*preg)); |
| 2275 | convert_f64le_to_f80le( (UChar*)&vexRegs[preg], |
| 2276 | &x87->reg[10*stno] ); |
| 2277 | } |
| 2278 | } |
| 2279 | x87->env[FPS_ENV_TAG] = toUShort(tagw); |
| 2280 | } |
| 2281 | |
| 2282 | |
| 2283 | /* This is used to implement 'frstor'. |
| 2284 | Reads 108 bytes at x87_state[0 .. 107]. */ |
| 2285 | /* CALLED FROM GENERATED CODE */ |
| 2286 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2287 | VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2288 | /*IN*/HWord x87_state) |
| 2289 | { |
| 2290 | return do_put_x87( True, (UChar*)x87_state, vex_state ); |
| 2291 | } |
| 2292 | |
| 2293 | |
| 2294 | /* This is used to implement 'frstors'. |
| 2295 | Reads 94 bytes at x87_state[0 .. 93]. */ |
| 2296 | /* CALLED FROM GENERATED CODE */ |
| 2297 | /* DIRTY HELPER */ |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2298 | VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state, |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2299 | /*IN*/HWord x87_state) |
| 2300 | { |
| 2301 | Int stno, preg; |
| 2302 | UInt tag; |
| 2303 | ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); |
| 2304 | UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); |
| 2305 | Fpu_State_16* x87 = (Fpu_State_16*)x87_state; |
| 2306 | UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7; |
| 2307 | UInt tagw = x87->env[FPS_ENV_TAG]; |
| 2308 | UInt fpucw = x87->env[FPS_ENV_CTRL]; |
| 2309 | UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2310 | VexEmNote ew; |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2311 | UInt fpround; |
| 2312 | ULong pair; |
| 2313 | |
| 2314 | /* Copy registers and tags */ |
| 2315 | for (stno = 0; stno < 8; stno++) { |
| 2316 | preg = (stno + ftop) & 7; |
| 2317 | tag = (tagw >> (2*preg)) & 3; |
| 2318 | if (tag == 3) { |
| 2319 | /* register is empty */ |
| 2320 | /* hmm, if it's empty, does it still get written? Probably |
| 2321 | safer to say it does. If we don't, memcheck could get out |
| 2322 | of sync, in that it thinks all FP registers are defined by |
| 2323 | this helper, but in reality some have not been updated. */ |
| 2324 | vexRegs[preg] = 0; /* IEEE754 64-bit zero */ |
| 2325 | vexTags[preg] = 0; |
| 2326 | } else { |
| 2327 | /* register is non-empty */ |
| 2328 | convert_f80le_to_f64le( &x87->reg[10*stno], |
| 2329 | (UChar*)&vexRegs[preg] ); |
| 2330 | vexTags[preg] = 1; |
| 2331 | } |
| 2332 | } |
| 2333 | |
| 2334 | /* stack pointer */ |
| 2335 | vex_state->guest_FTOP = ftop; |
| 2336 | |
| 2337 | /* status word */ |
| 2338 | vex_state->guest_FC3210 = c3210; |
| 2339 | |
| 2340 | /* handle the control word, setting FPROUND and detecting any |
| 2341 | emulation warnings. */ |
| 2342 | pair = amd64g_check_fldcw ( (ULong)fpucw ); |
| 2343 | fpround = (UInt)pair & 0xFFFFFFFFULL; |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 2344 | ew = (VexEmNote)(pair >> 32); |
sewardj | 9ae42a7 | 2012-02-16 14:18:56 +0000 | [diff] [blame] | 2345 | |
| 2346 | vex_state->guest_FPROUND = fpround & 3; |
| 2347 | |
| 2348 | /* emulation warnings --> caller */ |
| 2349 | return ew; |
| 2350 | } |
| 2351 | |
| 2352 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2353 | /*---------------------------------------------------------------*/ |
| 2354 | /*--- Misc integer helpers, including rotates and CPUID. ---*/ |
| 2355 | /*---------------------------------------------------------------*/ |
| 2356 | |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2357 | /* Claim to be the following CPU, which is probably representative of |
| 2358 | the lowliest (earliest) amd64 offerings. It can do neither sse3 |
| 2359 | nor cx16. |
| 2360 | |
| 2361 | vendor_id : AuthenticAMD |
| 2362 | cpu family : 15 |
| 2363 | model : 5 |
| 2364 | model name : AMD Opteron (tm) Processor 848 |
| 2365 | stepping : 10 |
| 2366 | cpu MHz : 1797.682 |
| 2367 | cache size : 1024 KB |
| 2368 | fpu : yes |
| 2369 | fpu_exception : yes |
| 2370 | cpuid level : 1 |
| 2371 | wp : yes |
| 2372 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2373 | mtrr pge mca cmov pat pse36 clflush mmx fxsr |
| 2374 | sse sse2 syscall nx mmxext lm 3dnowext 3dnow |
| 2375 | bogomips : 3600.62 |
| 2376 | TLB size : 1088 4K pages |
| 2377 | clflush size : 64 |
| 2378 | cache_alignment : 64 |
| 2379 | address sizes : 40 bits physical, 48 bits virtual |
sewardj | 1aa3aef | 2012-02-21 08:53:54 +0000 | [diff] [blame] | 2380 | power management: ts fid vid ttp |
| 2381 | |
| 2382 | 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact |
| 2383 | we don't support them. See #291568. 3dnow is 80000001.EDX.31 |
| 2384 | and 3dnowext is 80000001.EDX.30. |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2385 | */ |
| 2386 | void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) |
| 2387 | { |
| 2388 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2389 | do { st->guest_RAX = (ULong)(_a); \ |
| 2390 | st->guest_RBX = (ULong)(_b); \ |
| 2391 | st->guest_RCX = (ULong)(_c); \ |
| 2392 | st->guest_RDX = (ULong)(_d); \ |
| 2393 | } while (0) |
| 2394 | |
| 2395 | switch (0xFFFFFFFF & st->guest_RAX) { |
| 2396 | case 0x00000000: |
| 2397 | SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); |
| 2398 | break; |
| 2399 | case 0x00000001: |
| 2400 | SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); |
| 2401 | break; |
| 2402 | case 0x80000000: |
| 2403 | SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); |
| 2404 | break; |
| 2405 | case 0x80000001: |
sewardj | 1aa3aef | 2012-02-21 08:53:54 +0000 | [diff] [blame] | 2406 | /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is |
| 2407 | the original it-is-supported value that the h/w provides. |
| 2408 | See #291568. */ |
| 2409 | SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/ |
| 2410 | 0x21d3fbff); |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2411 | break; |
| 2412 | case 0x80000002: |
| 2413 | SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); |
| 2414 | break; |
| 2415 | case 0x80000003: |
| 2416 | SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); |
| 2417 | break; |
| 2418 | case 0x80000004: |
| 2419 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2420 | break; |
| 2421 | case 0x80000005: |
| 2422 | SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); |
| 2423 | break; |
| 2424 | case 0x80000006: |
| 2425 | SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); |
| 2426 | break; |
| 2427 | case 0x80000007: |
| 2428 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); |
| 2429 | break; |
| 2430 | case 0x80000008: |
| 2431 | SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); |
| 2432 | break; |
| 2433 | default: |
| 2434 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2435 | break; |
| 2436 | } |
| 2437 | # undef SET_ABCD |
| 2438 | } |
| 2439 | |
| 2440 | |
| 2441 | /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 |
| 2442 | capable. |
| 2443 | |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2444 | vendor_id : GenuineIntel |
| 2445 | cpu family : 6 |
| 2446 | model : 15 |
| 2447 | model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz |
| 2448 | stepping : 6 |
| 2449 | cpu MHz : 2394.000 |
| 2450 | cache size : 4096 KB |
| 2451 | physical id : 0 |
| 2452 | siblings : 2 |
| 2453 | core id : 0 |
| 2454 | cpu cores : 2 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2455 | fpu : yes |
| 2456 | fpu_exception : yes |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2457 | cpuid level : 10 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2458 | wp : yes |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2459 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2460 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2461 | mmx fxsr sse sse2 ss ht tm syscall nx lm |
| 2462 | constant_tsc pni monitor ds_cpl vmx est tm2 |
| 2463 | cx16 xtpr lahf_lm |
| 2464 | bogomips : 4798.78 |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2465 | clflush size : 64 |
| 2466 | cache_alignment : 64 |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2467 | address sizes : 36 bits physical, 48 bits virtual |
| 2468 | power management: |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2469 | */ |
sewardj | e9d8a26 | 2009-07-01 08:06:34 +0000 | [diff] [blame] | 2470 | void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2471 | { |
| 2472 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2473 | do { st->guest_RAX = (ULong)(_a); \ |
| 2474 | st->guest_RBX = (ULong)(_b); \ |
| 2475 | st->guest_RCX = (ULong)(_c); \ |
| 2476 | st->guest_RDX = (ULong)(_d); \ |
| 2477 | } while (0) |
| 2478 | |
| 2479 | switch (0xFFFFFFFF & st->guest_RAX) { |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2480 | case 0x00000000: |
| 2481 | SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2482 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2483 | case 0x00000001: |
| 2484 | SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2485 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2486 | case 0x00000002: |
| 2487 | SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2488 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2489 | case 0x00000003: |
| 2490 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2491 | break; |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2492 | case 0x00000004: { |
| 2493 | switch (0xFFFFFFFF & st->guest_RCX) { |
| 2494 | case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, |
| 2495 | 0x0000003f, 0x00000001); break; |
| 2496 | case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, |
| 2497 | 0x0000003f, 0x00000001); break; |
| 2498 | case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, |
| 2499 | 0x00000fff, 0x00000001); break; |
| 2500 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2501 | 0x00000000, 0x00000000); break; |
| 2502 | } |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2503 | break; |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2504 | } |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2505 | case 0x00000005: |
| 2506 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2507 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2508 | case 0x00000006: |
| 2509 | SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2510 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2511 | case 0x00000007: |
| 2512 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2513 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2514 | case 0x00000008: |
| 2515 | SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2516 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2517 | case 0x00000009: |
| 2518 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2519 | break; |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2520 | case 0x0000000a: |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2521 | unhandled_eax_value: |
sewardj | 150c9cd | 2008-02-09 01:16:02 +0000 | [diff] [blame] | 2522 | SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); |
| 2523 | break; |
| 2524 | case 0x80000000: |
| 2525 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 2526 | break; |
| 2527 | case 0x80000001: |
| 2528 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); |
| 2529 | break; |
| 2530 | case 0x80000002: |
| 2531 | SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| 2532 | break; |
| 2533 | case 0x80000003: |
| 2534 | SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); |
| 2535 | break; |
| 2536 | case 0x80000004: |
| 2537 | SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); |
| 2538 | break; |
| 2539 | case 0x80000005: |
| 2540 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2541 | break; |
| 2542 | case 0x80000006: |
| 2543 | SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); |
| 2544 | break; |
| 2545 | case 0x80000007: |
| 2546 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2547 | break; |
| 2548 | case 0x80000008: |
| 2549 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 2550 | break; |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2551 | default: |
sewardj | 32bfd3e | 2008-02-10 13:29:19 +0000 | [diff] [blame] | 2552 | goto unhandled_eax_value; |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 2553 | } |
| 2554 | # undef SET_ABCD |
| 2555 | } |
| 2556 | |
| 2557 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 2558 | /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16 |
| 2559 | capable. |
| 2560 | |
| 2561 | vendor_id : GenuineIntel |
| 2562 | cpu family : 6 |
| 2563 | model : 37 |
| 2564 | model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz |
| 2565 | stepping : 2 |
| 2566 | cpu MHz : 3334.000 |
| 2567 | cache size : 4096 KB |
| 2568 | physical id : 0 |
| 2569 | siblings : 4 |
| 2570 | core id : 0 |
| 2571 | cpu cores : 2 |
| 2572 | apicid : 0 |
| 2573 | initial apicid : 0 |
| 2574 | fpu : yes |
| 2575 | fpu_exception : yes |
| 2576 | cpuid level : 11 |
| 2577 | wp : yes |
| 2578 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2579 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2580 | mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| 2581 | lm constant_tsc arch_perfmon pebs bts rep_good |
| 2582 | xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| 2583 | dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 |
| 2584 | xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida |
| 2585 | arat tpr_shadow vnmi flexpriority ept vpid |
| 2586 | bogomips : 6957.57 |
| 2587 | clflush size : 64 |
| 2588 | cache_alignment : 64 |
| 2589 | address sizes : 36 bits physical, 48 bits virtual |
| 2590 | power management: |
| 2591 | */ |
| 2592 | void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ) |
| 2593 | { |
| 2594 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2595 | do { st->guest_RAX = (ULong)(_a); \ |
| 2596 | st->guest_RBX = (ULong)(_b); \ |
| 2597 | st->guest_RCX = (ULong)(_c); \ |
| 2598 | st->guest_RDX = (ULong)(_d); \ |
| 2599 | } while (0) |
| 2600 | |
| 2601 | UInt old_eax = (UInt)st->guest_RAX; |
| 2602 | UInt old_ecx = (UInt)st->guest_RCX; |
| 2603 | |
| 2604 | switch (old_eax) { |
| 2605 | case 0x00000000: |
| 2606 | SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69); |
| 2607 | break; |
| 2608 | case 0x00000001: |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 2609 | SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 2610 | break; |
| 2611 | case 0x00000002: |
| 2612 | SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c); |
| 2613 | break; |
| 2614 | case 0x00000003: |
| 2615 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2616 | break; |
| 2617 | case 0x00000004: |
| 2618 | switch (old_ecx) { |
| 2619 | case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 2620 | 0x0000003f, 0x00000000); break; |
| 2621 | case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f, |
| 2622 | 0x0000007f, 0x00000000); break; |
| 2623 | case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 2624 | 0x000001ff, 0x00000000); break; |
| 2625 | case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f, |
| 2626 | 0x00000fff, 0x00000002); break; |
| 2627 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2628 | 0x00000000, 0x00000000); break; |
| 2629 | } |
| 2630 | break; |
| 2631 | case 0x00000005: |
| 2632 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| 2633 | break; |
| 2634 | case 0x00000006: |
| 2635 | SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000); |
| 2636 | break; |
| 2637 | case 0x00000007: |
| 2638 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2639 | break; |
| 2640 | case 0x00000008: |
| 2641 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2642 | break; |
| 2643 | case 0x00000009: |
| 2644 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2645 | break; |
| 2646 | case 0x0000000a: |
| 2647 | SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603); |
| 2648 | break; |
| 2649 | case 0x0000000b: |
| 2650 | switch (old_ecx) { |
| 2651 | case 0x00000000: |
| 2652 | SET_ABCD(0x00000001, 0x00000002, |
| 2653 | 0x00000100, 0x00000000); break; |
| 2654 | case 0x00000001: |
| 2655 | SET_ABCD(0x00000004, 0x00000004, |
| 2656 | 0x00000201, 0x00000000); break; |
| 2657 | default: |
| 2658 | SET_ABCD(0x00000000, 0x00000000, |
| 2659 | old_ecx, 0x00000000); break; |
| 2660 | } |
| 2661 | break; |
| 2662 | case 0x0000000c: |
| 2663 | SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| 2664 | break; |
| 2665 | case 0x0000000d: |
| 2666 | switch (old_ecx) { |
| 2667 | case 0x00000000: SET_ABCD(0x00000001, 0x00000002, |
| 2668 | 0x00000100, 0x00000000); break; |
| 2669 | case 0x00000001: SET_ABCD(0x00000004, 0x00000004, |
| 2670 | 0x00000201, 0x00000000); break; |
| 2671 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2672 | old_ecx, 0x00000000); break; |
| 2673 | } |
| 2674 | break; |
| 2675 | case 0x80000000: |
| 2676 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 2677 | break; |
| 2678 | case 0x80000001: |
| 2679 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| 2680 | break; |
| 2681 | case 0x80000002: |
| 2682 | SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); |
| 2683 | break; |
| 2684 | case 0x80000003: |
| 2685 | SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020); |
| 2686 | break; |
| 2687 | case 0x80000004: |
| 2688 | SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847); |
| 2689 | break; |
| 2690 | case 0x80000005: |
| 2691 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2692 | break; |
| 2693 | case 0x80000006: |
| 2694 | SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| 2695 | break; |
| 2696 | case 0x80000007: |
| 2697 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| 2698 | break; |
| 2699 | case 0x80000008: |
| 2700 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 2701 | break; |
| 2702 | default: |
| 2703 | SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000); |
| 2704 | break; |
| 2705 | } |
| 2706 | # undef SET_ABCD |
| 2707 | } |
| 2708 | |
| 2709 | |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2710 | /* Claim to be the following CPU (4 x ...), which is AVX and cx16 |
sewardj | 9e4c376 | 2013-09-27 15:03:58 +0000 | [diff] [blame] | 2711 | capable. Plus (kludge!) it "supports" HTM. |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2712 | |
| 2713 | vendor_id : GenuineIntel |
| 2714 | cpu family : 6 |
| 2715 | model : 42 |
| 2716 | model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz |
| 2717 | stepping : 7 |
| 2718 | cpu MHz : 1600.000 |
| 2719 | cache size : 6144 KB |
| 2720 | physical id : 0 |
| 2721 | siblings : 4 |
| 2722 | core id : 3 |
| 2723 | cpu cores : 4 |
| 2724 | apicid : 6 |
| 2725 | initial apicid : 6 |
| 2726 | fpu : yes |
| 2727 | fpu_exception : yes |
| 2728 | cpuid level : 13 |
| 2729 | wp : yes |
| 2730 | flags : fpu vme de pse tsc msr pae mce cx8 apic sep |
| 2731 | mtrr pge mca cmov pat pse36 clflush dts acpi |
| 2732 | mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp |
| 2733 | lm constant_tsc arch_perfmon pebs bts rep_good |
| 2734 | nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq |
| 2735 | dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 |
| 2736 | xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx |
| 2737 | lahf_lm ida arat epb xsaveopt pln pts dts |
| 2738 | tpr_shadow vnmi flexpriority ept vpid |
| 2739 | |
| 2740 | bogomips : 5768.94 |
| 2741 | clflush size : 64 |
| 2742 | cache_alignment : 64 |
| 2743 | address sizes : 36 bits physical, 48 bits virtual |
| 2744 | power management: |
| 2745 | */ |
| 2746 | void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ) |
| 2747 | { |
| 2748 | # define SET_ABCD(_a,_b,_c,_d) \ |
| 2749 | do { st->guest_RAX = (ULong)(_a); \ |
| 2750 | st->guest_RBX = (ULong)(_b); \ |
| 2751 | st->guest_RCX = (ULong)(_c); \ |
| 2752 | st->guest_RDX = (ULong)(_d); \ |
| 2753 | } while (0) |
| 2754 | |
| 2755 | UInt old_eax = (UInt)st->guest_RAX; |
| 2756 | UInt old_ecx = (UInt)st->guest_RCX; |
| 2757 | |
| 2758 | switch (old_eax) { |
| 2759 | case 0x00000000: |
| 2760 | SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69); |
| 2761 | break; |
| 2762 | case 0x00000001: |
| 2763 | SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff); |
| 2764 | break; |
| 2765 | case 0x00000002: |
| 2766 | SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000); |
| 2767 | break; |
| 2768 | case 0x00000003: |
| 2769 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2770 | break; |
| 2771 | case 0x00000004: |
| 2772 | switch (old_ecx) { |
| 2773 | case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f, |
| 2774 | 0x0000003f, 0x00000000); break; |
| 2775 | case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f, |
| 2776 | 0x0000003f, 0x00000000); break; |
| 2777 | case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f, |
| 2778 | 0x000001ff, 0x00000000); break; |
| 2779 | case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f, |
| 2780 | 0x00001fff, 0x00000006); break; |
| 2781 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2782 | 0x00000000, 0x00000000); break; |
| 2783 | } |
| 2784 | break; |
| 2785 | case 0x00000005: |
| 2786 | SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120); |
| 2787 | break; |
| 2788 | case 0x00000006: |
| 2789 | SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000); |
| 2790 | break; |
| 2791 | case 0x00000007: |
sewardj | 9e4c376 | 2013-09-27 15:03:58 +0000 | [diff] [blame] | 2792 | SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000); |
sewardj | fe0c5e7 | 2012-06-15 15:48:07 +0000 | [diff] [blame] | 2793 | break; |
| 2794 | case 0x00000008: |
| 2795 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2796 | break; |
| 2797 | case 0x00000009: |
| 2798 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2799 | break; |
| 2800 | case 0x0000000a: |
| 2801 | SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603); |
| 2802 | break; |
| 2803 | case 0x0000000b: |
| 2804 | switch (old_ecx) { |
| 2805 | case 0x00000000: |
| 2806 | SET_ABCD(0x00000001, 0x00000001, |
| 2807 | 0x00000100, 0x00000000); break; |
| 2808 | case 0x00000001: |
| 2809 | SET_ABCD(0x00000004, 0x00000004, |
| 2810 | 0x00000201, 0x00000000); break; |
| 2811 | default: |
| 2812 | SET_ABCD(0x00000000, 0x00000000, |
| 2813 | old_ecx, 0x00000000); break; |
| 2814 | } |
| 2815 | break; |
| 2816 | case 0x0000000c: |
| 2817 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2818 | break; |
| 2819 | case 0x0000000d: |
| 2820 | switch (old_ecx) { |
| 2821 | case 0x00000000: SET_ABCD(0x00000007, 0x00000340, |
| 2822 | 0x00000340, 0x00000000); break; |
| 2823 | case 0x00000001: SET_ABCD(0x00000001, 0x00000000, |
| 2824 | 0x00000000, 0x00000000); break; |
| 2825 | case 0x00000002: SET_ABCD(0x00000100, 0x00000240, |
| 2826 | 0x00000000, 0x00000000); break; |
| 2827 | default: SET_ABCD(0x00000000, 0x00000000, |
| 2828 | 0x00000000, 0x00000000); break; |
| 2829 | } |
| 2830 | break; |
| 2831 | case 0x0000000e: |
| 2832 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 2833 | break; |
| 2834 | case 0x0000000f: |
| 2835 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 2836 | break; |
| 2837 | case 0x80000000: |
| 2838 | SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); |
| 2839 | break; |
| 2840 | case 0x80000001: |
| 2841 | SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800); |
| 2842 | break; |
| 2843 | case 0x80000002: |
| 2844 | SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c); |
| 2845 | break; |
| 2846 | case 0x80000003: |
| 2847 | SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d); |
| 2848 | break; |
| 2849 | case 0x80000004: |
| 2850 | SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847); |
| 2851 | break; |
| 2852 | case 0x80000005: |
| 2853 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 2854 | break; |
| 2855 | case 0x80000006: |
| 2856 | SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000); |
| 2857 | break; |
| 2858 | case 0x80000007: |
| 2859 | SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100); |
| 2860 | break; |
| 2861 | case 0x80000008: |
| 2862 | SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); |
| 2863 | break; |
| 2864 | default: |
| 2865 | SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000); |
| 2866 | break; |
| 2867 | } |
| 2868 | # undef SET_ABCD |
| 2869 | } |
| 2870 | |
| 2871 | |
sewardj | 112b099 | 2005-07-23 13:19:32 +0000 | [diff] [blame] | 2872 | ULong amd64g_calculate_RCR ( ULong arg, |
| 2873 | ULong rot_amt, |
| 2874 | ULong rflags_in, |
| 2875 | Long szIN ) |
| 2876 | { |
| 2877 | Bool wantRflags = toBool(szIN < 0); |
| 2878 | ULong sz = wantRflags ? (-szIN) : szIN; |
| 2879 | ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| 2880 | ULong cf=0, of=0, tempcf; |
| 2881 | |
| 2882 | switch (sz) { |
| 2883 | case 8: |
| 2884 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2885 | of = ((arg >> 63) ^ cf) & 1; |
| 2886 | while (tempCOUNT > 0) { |
| 2887 | tempcf = arg & 1; |
| 2888 | arg = (arg >> 1) | (cf << 63); |
| 2889 | cf = tempcf; |
| 2890 | tempCOUNT--; |
| 2891 | } |
| 2892 | break; |
| 2893 | case 4: |
| 2894 | while (tempCOUNT >= 33) tempCOUNT -= 33; |
| 2895 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2896 | of = ((arg >> 31) ^ cf) & 1; |
| 2897 | while (tempCOUNT > 0) { |
| 2898 | tempcf = arg & 1; |
| 2899 | arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); |
| 2900 | cf = tempcf; |
| 2901 | tempCOUNT--; |
| 2902 | } |
| 2903 | break; |
| 2904 | case 2: |
| 2905 | while (tempCOUNT >= 17) tempCOUNT -= 17; |
| 2906 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2907 | of = ((arg >> 15) ^ cf) & 1; |
| 2908 | while (tempCOUNT > 0) { |
| 2909 | tempcf = arg & 1; |
| 2910 | arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); |
| 2911 | cf = tempcf; |
| 2912 | tempCOUNT--; |
| 2913 | } |
| 2914 | break; |
| 2915 | case 1: |
| 2916 | while (tempCOUNT >= 9) tempCOUNT -= 9; |
| 2917 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2918 | of = ((arg >> 7) ^ cf) & 1; |
| 2919 | while (tempCOUNT > 0) { |
| 2920 | tempcf = arg & 1; |
| 2921 | arg = ((arg >> 1) & 0x7FULL) | (cf << 7); |
| 2922 | cf = tempcf; |
| 2923 | tempCOUNT--; |
| 2924 | } |
| 2925 | break; |
| 2926 | default: |
| 2927 | vpanic("calculate_RCR(amd64g): invalid size"); |
| 2928 | } |
| 2929 | |
| 2930 | cf &= 1; |
| 2931 | of &= 1; |
| 2932 | rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| 2933 | rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| 2934 | |
| 2935 | /* caller can ask to have back either the resulting flags or |
| 2936 | resulting value, but not both */ |
| 2937 | return wantRflags ? rflags_in : arg; |
| 2938 | } |
| 2939 | |
sewardj | b5e5c6d | 2007-01-12 20:29:01 +0000 | [diff] [blame] | 2940 | ULong amd64g_calculate_RCL ( ULong arg, |
| 2941 | ULong rot_amt, |
| 2942 | ULong rflags_in, |
| 2943 | Long szIN ) |
| 2944 | { |
| 2945 | Bool wantRflags = toBool(szIN < 0); |
| 2946 | ULong sz = wantRflags ? (-szIN) : szIN; |
| 2947 | ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); |
| 2948 | ULong cf=0, of=0, tempcf; |
| 2949 | |
| 2950 | switch (sz) { |
| 2951 | case 8: |
| 2952 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2953 | while (tempCOUNT > 0) { |
| 2954 | tempcf = (arg >> 63) & 1; |
| 2955 | arg = (arg << 1) | (cf & 1); |
| 2956 | cf = tempcf; |
| 2957 | tempCOUNT--; |
| 2958 | } |
| 2959 | of = ((arg >> 63) ^ cf) & 1; |
| 2960 | break; |
| 2961 | case 4: |
| 2962 | while (tempCOUNT >= 33) tempCOUNT -= 33; |
| 2963 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2964 | while (tempCOUNT > 0) { |
| 2965 | tempcf = (arg >> 31) & 1; |
| 2966 | arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); |
| 2967 | cf = tempcf; |
| 2968 | tempCOUNT--; |
| 2969 | } |
| 2970 | of = ((arg >> 31) ^ cf) & 1; |
| 2971 | break; |
| 2972 | case 2: |
| 2973 | while (tempCOUNT >= 17) tempCOUNT -= 17; |
| 2974 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2975 | while (tempCOUNT > 0) { |
| 2976 | tempcf = (arg >> 15) & 1; |
| 2977 | arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); |
| 2978 | cf = tempcf; |
| 2979 | tempCOUNT--; |
| 2980 | } |
| 2981 | of = ((arg >> 15) ^ cf) & 1; |
| 2982 | break; |
| 2983 | case 1: |
| 2984 | while (tempCOUNT >= 9) tempCOUNT -= 9; |
| 2985 | cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; |
| 2986 | while (tempCOUNT > 0) { |
| 2987 | tempcf = (arg >> 7) & 1; |
| 2988 | arg = 0xFFULL & ((arg << 1) | (cf & 1)); |
| 2989 | cf = tempcf; |
| 2990 | tempCOUNT--; |
| 2991 | } |
| 2992 | of = ((arg >> 7) ^ cf) & 1; |
| 2993 | break; |
| 2994 | default: |
| 2995 | vpanic("calculate_RCL(amd64g): invalid size"); |
| 2996 | } |
| 2997 | |
| 2998 | cf &= 1; |
| 2999 | of &= 1; |
| 3000 | rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); |
| 3001 | rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); |
| 3002 | |
| 3003 | return wantRflags ? rflags_in : arg; |
| 3004 | } |
| 3005 | |
sewardj | 1a179b5 | 2010-09-28 19:56:32 +0000 | [diff] [blame] | 3006 | /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+) |
| 3007 | * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25 |
| 3008 | */ |
| 3009 | ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which) |
| 3010 | { |
| 3011 | ULong hi, lo, tmp, A[16]; |
| 3012 | |
| 3013 | A[0] = 0; A[1] = a; |
| 3014 | A[2] = A[1] << 1; A[3] = A[2] ^ a; |
| 3015 | A[4] = A[2] << 1; A[5] = A[4] ^ a; |
| 3016 | A[6] = A[3] << 1; A[7] = A[6] ^ a; |
| 3017 | A[8] = A[4] << 1; A[9] = A[8] ^ a; |
| 3018 | A[10] = A[5] << 1; A[11] = A[10] ^ a; |
| 3019 | A[12] = A[6] << 1; A[13] = A[12] ^ a; |
| 3020 | A[14] = A[7] << 1; A[15] = A[14] ^ a; |
| 3021 | |
| 3022 | lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15]; |
| 3023 | hi = lo >> 56; |
| 3024 | lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15]; |
| 3025 | hi = (hi << 8) | (lo >> 56); |
| 3026 | lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15]; |
| 3027 | hi = (hi << 8) | (lo >> 56); |
| 3028 | lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15]; |
| 3029 | hi = (hi << 8) | (lo >> 56); |
| 3030 | lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15]; |
| 3031 | hi = (hi << 8) | (lo >> 56); |
| 3032 | lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15]; |
| 3033 | hi = (hi << 8) | (lo >> 56); |
| 3034 | lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15]; |
| 3035 | hi = (hi << 8) | (lo >> 56); |
| 3036 | lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15]; |
| 3037 | |
| 3038 | ULong m0 = -1; |
| 3039 | m0 /= 255; |
| 3040 | tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp; |
| 3041 | tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp; |
| 3042 | tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp; |
| 3043 | tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp; |
| 3044 | tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp; |
| 3045 | tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp; |
| 3046 | tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp; |
| 3047 | |
| 3048 | return which ? hi : lo; |
| 3049 | } |
| 3050 | |
sewardj | 112b099 | 2005-07-23 13:19:32 +0000 | [diff] [blame] | 3051 | |
sewardj | bc6af53 | 2005-08-23 23:16:51 +0000 | [diff] [blame] | 3052 | /* CALLED FROM GENERATED CODE */ |
| 3053 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3054 | /* Horrible hack. On non-amd64 platforms, return 1. */ |
| 3055 | ULong amd64g_dirtyhelper_RDTSC ( void ) |
| 3056 | { |
| 3057 | # if defined(__x86_64__) |
| 3058 | UInt eax, edx; |
| 3059 | __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); |
| 3060 | return (((ULong)edx) << 32) | ((ULong)eax); |
| 3061 | # else |
| 3062 | return 1ULL; |
| 3063 | # endif |
| 3064 | } |
| 3065 | |
sewardj | 818c730 | 2013-03-26 13:53:18 +0000 | [diff] [blame] | 3066 | /* CALLED FROM GENERATED CODE */ |
| 3067 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3068 | /* Horrible hack. On non-amd64 platforms, return 1. */ |
| 3069 | /* This uses a different calling convention from _RDTSC just above |
| 3070 | only because of the difficulty of returning 96 bits from a C |
| 3071 | function -- RDTSC returns 64 bits and so is simple by comparison, |
| 3072 | on amd64. */ |
| 3073 | void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) |
| 3074 | { |
| 3075 | # if defined(__x86_64__) |
| 3076 | UInt eax, ecx, edx; |
| 3077 | __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); |
| 3078 | st->guest_RAX = (ULong)eax; |
| 3079 | st->guest_RCX = (ULong)ecx; |
| 3080 | st->guest_RDX = (ULong)edx; |
| 3081 | # else |
| 3082 | /* Do nothing. */ |
| 3083 | # endif |
| 3084 | } |
sewardj | bc6af53 | 2005-08-23 23:16:51 +0000 | [diff] [blame] | 3085 | |
sewardj | bb4396c | 2007-11-20 17:29:08 +0000 | [diff] [blame] | 3086 | /* CALLED FROM GENERATED CODE */ |
| 3087 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3088 | /* Horrible hack. On non-amd64 platforms, return 0. */ |
| 3089 | ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) |
| 3090 | { |
| 3091 | # if defined(__x86_64__) |
| 3092 | ULong r = 0; |
| 3093 | portno &= 0xFFFF; |
| 3094 | switch (sz) { |
| 3095 | case 4: |
| 3096 | __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" |
| 3097 | : "=a" (r) : "Nd" (portno)); |
| 3098 | break; |
| 3099 | case 2: |
| 3100 | __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" |
| 3101 | : "=a" (r) : "Nd" (portno)); |
| 3102 | break; |
| 3103 | case 1: |
| 3104 | __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" |
| 3105 | : "=a" (r) : "Nd" (portno)); |
| 3106 | break; |
| 3107 | default: |
| 3108 | break; /* note: no 64-bit version of insn exists */ |
| 3109 | } |
| 3110 | return r; |
| 3111 | # else |
| 3112 | return 0; |
| 3113 | # endif |
| 3114 | } |
| 3115 | |
| 3116 | |
| 3117 | /* CALLED FROM GENERATED CODE */ |
| 3118 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3119 | /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| 3120 | void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) |
| 3121 | { |
| 3122 | # if defined(__x86_64__) |
| 3123 | portno &= 0xFFFF; |
| 3124 | switch (sz) { |
| 3125 | case 4: |
| 3126 | __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" |
| 3127 | : : "a" (data), "Nd" (portno)); |
| 3128 | break; |
| 3129 | case 2: |
| 3130 | __asm__ __volatile__("outw %w0, %w1" |
| 3131 | : : "a" (data), "Nd" (portno)); |
| 3132 | break; |
| 3133 | case 1: |
| 3134 | __asm__ __volatile__("outb %b0, %w1" |
| 3135 | : : "a" (data), "Nd" (portno)); |
| 3136 | break; |
| 3137 | default: |
| 3138 | break; /* note: no 64-bit version of insn exists */ |
| 3139 | } |
| 3140 | # else |
| 3141 | /* do nothing */ |
| 3142 | # endif |
| 3143 | } |
| 3144 | |
sewardj | b9dc243 | 2010-06-07 16:22:22 +0000 | [diff] [blame] | 3145 | /* CALLED FROM GENERATED CODE */ |
| 3146 | /* DIRTY HELPER (non-referentially-transparent) */ |
| 3147 | /* Horrible hack. On non-amd64 platforms, do nothing. */ |
| 3148 | /* op = 0: call the native SGDT instruction. |
| 3149 | op = 1: call the native SIDT instruction. |
| 3150 | */ |
| 3151 | void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) { |
| 3152 | # if defined(__x86_64__) |
| 3153 | switch (op) { |
| 3154 | case 0: |
| 3155 | __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory"); |
| 3156 | break; |
| 3157 | case 1: |
| 3158 | __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory"); |
| 3159 | break; |
| 3160 | default: |
| 3161 | vpanic("amd64g_dirtyhelper_SxDT"); |
| 3162 | } |
| 3163 | # else |
| 3164 | /* do nothing */ |
| 3165 | UChar* p = (UChar*)address; |
| 3166 | p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0; |
| 3167 | p[6] = p[7] = p[8] = p[9] = 0; |
| 3168 | # endif |
| 3169 | } |
sewardj | bb4396c | 2007-11-20 17:29:08 +0000 | [diff] [blame] | 3170 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3171 | /*---------------------------------------------------------------*/ |
| 3172 | /*--- Helpers for MMX/SSE/SSE2. ---*/ |
| 3173 | /*---------------------------------------------------------------*/ |
| 3174 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3175 | static inline UChar abdU8 ( UChar xx, UChar yy ) { |
| 3176 | return toUChar(xx>yy ? xx-yy : yy-xx); |
| 3177 | } |
| 3178 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3179 | static inline ULong mk32x2 ( UInt w1, UInt w0 ) { |
| 3180 | return (((ULong)w1) << 32) | ((ULong)w0); |
| 3181 | } |
| 3182 | |
| 3183 | static inline UShort sel16x4_3 ( ULong w64 ) { |
| 3184 | UInt hi32 = toUInt(w64 >> 32); |
| 3185 | return toUShort(hi32 >> 16); |
| 3186 | } |
| 3187 | static inline UShort sel16x4_2 ( ULong w64 ) { |
| 3188 | UInt hi32 = toUInt(w64 >> 32); |
| 3189 | return toUShort(hi32); |
| 3190 | } |
| 3191 | static inline UShort sel16x4_1 ( ULong w64 ) { |
| 3192 | UInt lo32 = toUInt(w64); |
| 3193 | return toUShort(lo32 >> 16); |
| 3194 | } |
| 3195 | static inline UShort sel16x4_0 ( ULong w64 ) { |
| 3196 | UInt lo32 = toUInt(w64); |
| 3197 | return toUShort(lo32); |
| 3198 | } |
| 3199 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3200 | static inline UChar sel8x8_7 ( ULong w64 ) { |
| 3201 | UInt hi32 = toUInt(w64 >> 32); |
| 3202 | return toUChar(hi32 >> 24); |
| 3203 | } |
| 3204 | static inline UChar sel8x8_6 ( ULong w64 ) { |
| 3205 | UInt hi32 = toUInt(w64 >> 32); |
| 3206 | return toUChar(hi32 >> 16); |
| 3207 | } |
| 3208 | static inline UChar sel8x8_5 ( ULong w64 ) { |
| 3209 | UInt hi32 = toUInt(w64 >> 32); |
| 3210 | return toUChar(hi32 >> 8); |
| 3211 | } |
| 3212 | static inline UChar sel8x8_4 ( ULong w64 ) { |
| 3213 | UInt hi32 = toUInt(w64 >> 32); |
| 3214 | return toUChar(hi32 >> 0); |
| 3215 | } |
| 3216 | static inline UChar sel8x8_3 ( ULong w64 ) { |
| 3217 | UInt lo32 = toUInt(w64); |
| 3218 | return toUChar(lo32 >> 24); |
| 3219 | } |
| 3220 | static inline UChar sel8x8_2 ( ULong w64 ) { |
| 3221 | UInt lo32 = toUInt(w64); |
| 3222 | return toUChar(lo32 >> 16); |
| 3223 | } |
| 3224 | static inline UChar sel8x8_1 ( ULong w64 ) { |
| 3225 | UInt lo32 = toUInt(w64); |
| 3226 | return toUChar(lo32 >> 8); |
| 3227 | } |
| 3228 | static inline UChar sel8x8_0 ( ULong w64 ) { |
| 3229 | UInt lo32 = toUInt(w64); |
| 3230 | return toUChar(lo32 >> 0); |
| 3231 | } |
| 3232 | |
sewardj | 8711f66 | 2005-05-09 17:52:56 +0000 | [diff] [blame] | 3233 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3234 | ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) |
| 3235 | { |
| 3236 | return |
| 3237 | mk32x2( |
| 3238 | (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) |
| 3239 | + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), |
| 3240 | (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) |
| 3241 | + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) |
| 3242 | ); |
| 3243 | } |
| 3244 | |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3245 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | a7ba8c4 | 2005-05-10 20:08:34 +0000 | [diff] [blame] | 3246 | ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) |
| 3247 | { |
| 3248 | UInt t = 0; |
| 3249 | t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); |
| 3250 | t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); |
| 3251 | t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); |
| 3252 | t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); |
| 3253 | t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| 3254 | t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| 3255 | t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| 3256 | t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| 3257 | t &= 0xFFFF; |
| 3258 | return (ULong)t; |
| 3259 | } |
| 3260 | |
sewardj | adffcef | 2005-05-11 00:03:06 +0000 | [diff] [blame] | 3261 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | 8cb931e | 2012-02-16 22:02:14 +0000 | [diff] [blame] | 3262 | ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) |
| 3263 | { |
| 3264 | UShort t, min; |
| 3265 | UInt idx; |
| 3266 | t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } |
| 3267 | t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } |
| 3268 | t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } |
| 3269 | t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } |
| 3270 | t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } |
| 3271 | t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } |
| 3272 | t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } |
| 3273 | t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } |
| 3274 | return ((ULong)(idx << 16)) | ((ULong)min); |
| 3275 | } |
| 3276 | |
| 3277 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
sewardj | 186f869 | 2011-01-21 17:51:44 +0000 | [diff] [blame] | 3278 | ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) |
| 3279 | { |
| 3280 | UInt i; |
| 3281 | ULong crc = (b & 0xFFULL) ^ crcIn; |
| 3282 | for (i = 0; i < 8; i++) |
| 3283 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3284 | return crc; |
| 3285 | } |
| 3286 | |
| 3287 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3288 | ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ) |
| 3289 | { |
| 3290 | UInt i; |
| 3291 | ULong crc = (w & 0xFFFFULL) ^ crcIn; |
| 3292 | for (i = 0; i < 16; i++) |
| 3293 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3294 | return crc; |
| 3295 | } |
| 3296 | |
| 3297 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3298 | ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ) |
| 3299 | { |
| 3300 | UInt i; |
| 3301 | ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn; |
| 3302 | for (i = 0; i < 32; i++) |
| 3303 | crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0); |
| 3304 | return crc; |
| 3305 | } |
| 3306 | |
| 3307 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3308 | ULong amd64g_calc_crc32q ( ULong crcIn, ULong q ) |
| 3309 | { |
| 3310 | ULong crc = amd64g_calc_crc32l(crcIn, q); |
| 3311 | return amd64g_calc_crc32l(crc, q >> 32); |
| 3312 | } |
| 3313 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 3314 | |
sewardj | 4d5bce2 | 2012-02-21 11:02:44 +0000 | [diff] [blame] | 3315 | /* .. helper for next fn .. */ |
| 3316 | static inline ULong sad_8x4 ( ULong xx, ULong yy ) |
| 3317 | { |
| 3318 | UInt t = 0; |
| 3319 | t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); |
| 3320 | t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); |
| 3321 | t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); |
| 3322 | t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); |
| 3323 | return (ULong)t; |
| 3324 | } |
| 3325 | |
| 3326 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3327 | ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo, |
| 3328 | ULong dHi, ULong dLo, |
| 3329 | ULong imm_and_return_control_bit ) |
| 3330 | { |
| 3331 | UInt imm8 = imm_and_return_control_bit & 7; |
| 3332 | Bool calcHi = (imm_and_return_control_bit >> 7) & 1; |
| 3333 | UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */ |
| 3334 | UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */ |
| 3335 | /* For src we only need 32 bits, so get them into the |
| 3336 | lower half of a 64 bit word. */ |
| 3337 | ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1)); |
| 3338 | /* For dst we need to get hold of 56 bits (7 bytes) from a total of |
| 3339 | 11 bytes. If calculating the low part of the result, need bytes |
| 3340 | dstOffsL * 4 + (0 .. 6); if calculating the high part, |
| 3341 | dstOffsL * 4 + (4 .. 10). */ |
| 3342 | ULong dst; |
| 3343 | /* dstOffL = 0, Lo -> 0 .. 6 |
| 3344 | dstOffL = 1, Lo -> 4 .. 10 |
| 3345 | dstOffL = 0, Hi -> 4 .. 10 |
| 3346 | dstOffL = 1, Hi -> 8 .. 14 |
| 3347 | */ |
| 3348 | if (calcHi && dstOffsL) { |
| 3349 | /* 8 .. 14 */ |
| 3350 | dst = dHi & 0x00FFFFFFFFFFFFFFULL; |
| 3351 | } |
| 3352 | else if (!calcHi && !dstOffsL) { |
| 3353 | /* 0 .. 6 */ |
| 3354 | dst = dLo & 0x00FFFFFFFFFFFFFFULL; |
| 3355 | } |
| 3356 | else { |
| 3357 | /* 4 .. 10 */ |
| 3358 | dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32); |
| 3359 | } |
| 3360 | ULong r0 = sad_8x4( dst >> 0, src ); |
| 3361 | ULong r1 = sad_8x4( dst >> 8, src ); |
| 3362 | ULong r2 = sad_8x4( dst >> 16, src ); |
| 3363 | ULong r3 = sad_8x4( dst >> 24, src ); |
| 3364 | ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0; |
| 3365 | return res; |
| 3366 | } |
| 3367 | |
sewardj | cc3d219 | 2013-03-27 11:37:33 +0000 | [diff] [blame] | 3368 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3369 | ULong amd64g_calculate_pext ( ULong src_masked, ULong mask ) |
| 3370 | { |
| 3371 | ULong dst = 0; |
| 3372 | ULong src_bit; |
| 3373 | ULong dst_bit = 1; |
| 3374 | for (src_bit = 1; src_bit; src_bit <<= 1) { |
| 3375 | if (mask & src_bit) { |
| 3376 | if (src_masked & src_bit) dst |= dst_bit; |
| 3377 | dst_bit <<= 1; |
| 3378 | } |
| 3379 | } |
| 3380 | return dst; |
| 3381 | } |
| 3382 | |
| 3383 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 3384 | ULong amd64g_calculate_pdep ( ULong src, ULong mask ) |
| 3385 | { |
| 3386 | ULong dst = 0; |
| 3387 | ULong dst_bit; |
| 3388 | ULong src_bit = 1; |
| 3389 | for (dst_bit = 1; dst_bit; dst_bit <<= 1) { |
| 3390 | if (mask & dst_bit) { |
| 3391 | if (src & src_bit) dst |= dst_bit; |
| 3392 | src_bit <<= 1; |
| 3393 | } |
| 3394 | } |
| 3395 | return dst; |
| 3396 | } |
| 3397 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3398 | /*---------------------------------------------------------------*/ |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3399 | /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/ |
| 3400 | /*---------------------------------------------------------------*/ |
| 3401 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3402 | static UInt zmask_from_V128 ( V128* arg ) |
| 3403 | { |
| 3404 | UInt i, res = 0; |
| 3405 | for (i = 0; i < 16; i++) { |
| 3406 | res |= ((arg->w8[i] == 0) ? 1 : 0) << i; |
| 3407 | } |
| 3408 | return res; |
| 3409 | } |
| 3410 | |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3411 | static UInt zmask_from_V128_wide ( V128* arg ) |
| 3412 | { |
| 3413 | UInt i, res = 0; |
| 3414 | for (i = 0; i < 8; i++) { |
| 3415 | res |= ((arg->w16[i] == 0) ? 1 : 0) << i; |
| 3416 | } |
| 3417 | return res; |
| 3418 | } |
| 3419 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3420 | /* Helps with PCMP{I,E}STR{I,M}. |
| 3421 | |
| 3422 | CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really, |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3423 | actually it could be a clean helper, but for the fact that we can't |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3424 | pass by value 2 x V128 to a clean helper, nor have one returned.) |
| 3425 | Reads guest state, writes to guest state for the xSTRM cases, no |
| 3426 | accesses of memory, is a pure function. |
| 3427 | |
| 3428 | opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so |
| 3429 | the callee knows which I/E and I/M variant it is dealing with and |
| 3430 | what the specific operation is. 4th byte of opcode is in the range |
| 3431 | 0x60 to 0x63: |
| 3432 | istri 66 0F 3A 63 |
| 3433 | istrm 66 0F 3A 62 |
| 3434 | estri 66 0F 3A 61 |
| 3435 | estrm 66 0F 3A 60 |
| 3436 | |
| 3437 | gstOffL and gstOffR are the guest state offsets for the two XMM |
| 3438 | register inputs. We never have to deal with the memory case since |
| 3439 | that is handled by pre-loading the relevant value into the fake |
| 3440 | XMM16 register. |
| 3441 | |
| 3442 | For ESTRx variants, edxIN and eaxIN hold the values of those two |
| 3443 | registers. |
| 3444 | |
| 3445 | In all cases, the bottom 16 bits of the result contain the new |
| 3446 | OSZACP %rflags values. For xSTRI variants, bits[31:16] of the |
| 3447 | result hold the new %ecx value. For xSTRM variants, the helper |
| 3448 | writes the result directly to the guest XMM0. |
| 3449 | |
| 3450 | Declarable side effects: in all cases, reads guest state at |
| 3451 | [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes |
| 3452 | guest_XMM0. |
| 3453 | |
| 3454 | Is expected to be called with opc_and_imm combinations which have |
| 3455 | actually been validated, and will assert if otherwise. The front |
| 3456 | end should ensure we're only called with verified values. |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3457 | */ |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3458 | ULong amd64g_dirtyhelper_PCMPxSTRx ( |
| 3459 | VexGuestAMD64State* gst, |
| 3460 | HWord opc4_and_imm, |
| 3461 | HWord gstOffL, HWord gstOffR, |
| 3462 | HWord edxIN, HWord eaxIN |
| 3463 | ) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3464 | { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3465 | HWord opc4 = (opc4_and_imm >> 8) & 0xFF; |
| 3466 | HWord imm8 = opc4_and_imm & 0xFF; |
| 3467 | HWord isISTRx = opc4 & 2; |
| 3468 | HWord isxSTRM = (opc4 & 1) ^ 1; |
| 3469 | vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */ |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3470 | HWord wide = (imm8 & 1); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3471 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3472 | // where the args are |
| 3473 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 3474 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3475 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3476 | /* Create the arg validity masks, either from the vectors |
| 3477 | themselves or from the supplied edx/eax values. */ |
| 3478 | // FIXME: this is only right for the 8-bit data cases. |
| 3479 | // At least that is asserted above. |
| 3480 | UInt zmaskL, zmaskR; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3481 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3482 | // temp spot for the resulting flags and vector. |
| 3483 | V128 resV; |
| 3484 | UInt resOSZACP; |
| 3485 | |
sewardj | 3c3d6d6 | 2012-02-16 15:21:08 +0000 | [diff] [blame] | 3486 | // for checking whether case was handled |
| 3487 | Bool ok = False; |
| 3488 | |
| 3489 | if (wide) { |
| 3490 | if (isISTRx) { |
| 3491 | zmaskL = zmask_from_V128_wide(argL); |
| 3492 | zmaskR = zmask_from_V128_wide(argR); |
| 3493 | } else { |
| 3494 | Int tmp; |
| 3495 | tmp = edxIN & 0xFFFFFFFF; |
| 3496 | if (tmp < -8) tmp = -8; |
| 3497 | if (tmp > 8) tmp = 8; |
| 3498 | if (tmp < 0) tmp = -tmp; |
| 3499 | vassert(tmp >= 0 && tmp <= 8); |
| 3500 | zmaskL = (1 << tmp) & 0xFF; |
| 3501 | tmp = eaxIN & 0xFFFFFFFF; |
| 3502 | if (tmp < -8) tmp = -8; |
| 3503 | if (tmp > 8) tmp = 8; |
| 3504 | if (tmp < 0) tmp = -tmp; |
| 3505 | vassert(tmp >= 0 && tmp <= 8); |
| 3506 | zmaskR = (1 << tmp) & 0xFF; |
| 3507 | } |
| 3508 | // do the meyaath |
| 3509 | ok = compute_PCMPxSTRx_wide ( |
| 3510 | &resV, &resOSZACP, argL, argR, |
| 3511 | zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| 3512 | ); |
| 3513 | } else { |
| 3514 | if (isISTRx) { |
| 3515 | zmaskL = zmask_from_V128(argL); |
| 3516 | zmaskR = zmask_from_V128(argR); |
| 3517 | } else { |
| 3518 | Int tmp; |
| 3519 | tmp = edxIN & 0xFFFFFFFF; |
| 3520 | if (tmp < -16) tmp = -16; |
| 3521 | if (tmp > 16) tmp = 16; |
| 3522 | if (tmp < 0) tmp = -tmp; |
| 3523 | vassert(tmp >= 0 && tmp <= 16); |
| 3524 | zmaskL = (1 << tmp) & 0xFFFF; |
| 3525 | tmp = eaxIN & 0xFFFFFFFF; |
| 3526 | if (tmp < -16) tmp = -16; |
| 3527 | if (tmp > 16) tmp = 16; |
| 3528 | if (tmp < 0) tmp = -tmp; |
| 3529 | vassert(tmp >= 0 && tmp <= 16); |
| 3530 | zmaskR = (1 << tmp) & 0xFFFF; |
| 3531 | } |
| 3532 | // do the meyaath |
| 3533 | ok = compute_PCMPxSTRx ( |
| 3534 | &resV, &resOSZACP, argL, argR, |
| 3535 | zmaskL, zmaskR, imm8, (Bool)isxSTRM |
| 3536 | ); |
| 3537 | } |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3538 | |
| 3539 | // front end shouldn't pass us any imm8 variants we can't |
| 3540 | // handle. Hence: |
| 3541 | vassert(ok); |
| 3542 | |
| 3543 | // So, finally we need to get the results back to the caller. |
| 3544 | // In all cases, the new OSZACP value is the lowest 16 of |
| 3545 | // the return value. |
| 3546 | if (isxSTRM) { |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 3547 | gst->guest_YMM0[0] = resV.w32[0]; |
| 3548 | gst->guest_YMM0[1] = resV.w32[1]; |
| 3549 | gst->guest_YMM0[2] = resV.w32[2]; |
| 3550 | gst->guest_YMM0[3] = resV.w32[3]; |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 3551 | return resOSZACP & 0x8D5; |
| 3552 | } else { |
| 3553 | UInt newECX = resV.w32[0] & 0xFFFF; |
| 3554 | return (newECX << 16) | (resOSZACP & 0x8D5); |
| 3555 | } |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3556 | } |
| 3557 | |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3558 | /*---------------------------------------------------------------*/ |
| 3559 | /*--- AES primitives and helpers ---*/ |
| 3560 | /*---------------------------------------------------------------*/ |
| 3561 | /* a 16 x 16 matrix */ |
| 3562 | static const UChar sbox[256] = { // row nr |
| 3563 | 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1 |
| 3564 | 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, |
| 3565 | 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2 |
| 3566 | 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, |
| 3567 | 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3 |
| 3568 | 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, |
| 3569 | 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4 |
| 3570 | 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, |
| 3571 | 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5 |
| 3572 | 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, |
| 3573 | 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6 |
| 3574 | 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, |
| 3575 | 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7 |
| 3576 | 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, |
| 3577 | 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8 |
| 3578 | 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, |
| 3579 | 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9 |
| 3580 | 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, |
| 3581 | 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10 |
| 3582 | 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, |
| 3583 | 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11 |
| 3584 | 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, |
| 3585 | 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12 |
| 3586 | 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, |
| 3587 | 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13 |
| 3588 | 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, |
| 3589 | 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14 |
| 3590 | 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, |
| 3591 | 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15 |
| 3592 | 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, |
| 3593 | 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16 |
| 3594 | 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 |
| 3595 | }; |
| 3596 | static void SubBytes (V128* v) |
| 3597 | { |
| 3598 | V128 r; |
| 3599 | UInt i; |
| 3600 | for (i = 0; i < 16; i++) |
| 3601 | r.w8[i] = sbox[v->w8[i]]; |
| 3602 | *v = r; |
| 3603 | } |
| 3604 | |
| 3605 | /* a 16 x 16 matrix */ |
| 3606 | static const UChar invsbox[256] = { // row nr |
| 3607 | 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1 |
| 3608 | 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, |
| 3609 | 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2 |
| 3610 | 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, |
| 3611 | 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3 |
| 3612 | 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, |
| 3613 | 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4 |
| 3614 | 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, |
| 3615 | 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5 |
| 3616 | 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, |
| 3617 | 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6 |
| 3618 | 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, |
| 3619 | 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7 |
| 3620 | 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, |
| 3621 | 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8 |
| 3622 | 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, |
| 3623 | 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9 |
| 3624 | 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, |
| 3625 | 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10 |
| 3626 | 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, |
| 3627 | 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11 |
| 3628 | 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, |
| 3629 | 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12 |
| 3630 | 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, |
| 3631 | 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13 |
| 3632 | 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, |
| 3633 | 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14 |
| 3634 | 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, |
| 3635 | 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15 |
| 3636 | 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, |
| 3637 | 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16 |
| 3638 | 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d |
| 3639 | }; |
| 3640 | static void InvSubBytes (V128* v) |
| 3641 | { |
| 3642 | V128 r; |
| 3643 | UInt i; |
| 3644 | for (i = 0; i < 16; i++) |
| 3645 | r.w8[i] = invsbox[v->w8[i]]; |
| 3646 | *v = r; |
| 3647 | } |
| 3648 | |
| 3649 | static const UChar ShiftRows_op[16] = |
| 3650 | {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0}; |
| 3651 | static void ShiftRows (V128* v) |
| 3652 | { |
| 3653 | V128 r; |
| 3654 | UInt i; |
| 3655 | for (i = 0; i < 16; i++) |
| 3656 | r.w8[i] = v->w8[ShiftRows_op[15-i]]; |
| 3657 | *v = r; |
| 3658 | } |
| 3659 | |
| 3660 | static const UChar InvShiftRows_op[16] = |
| 3661 | {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0}; |
| 3662 | static void InvShiftRows (V128* v) |
| 3663 | { |
| 3664 | V128 r; |
| 3665 | UInt i; |
| 3666 | for (i = 0; i < 16; i++) |
| 3667 | r.w8[i] = v->w8[InvShiftRows_op[15-i]]; |
| 3668 | *v = r; |
| 3669 | } |
| 3670 | |
| 3671 | /* Multiplication of the finite fields elements of AES. |
| 3672 | See "A Specification for The AES Algorithm Rijndael |
| 3673 | (by Joan Daemen & Vincent Rijmen)" |
| 3674 | Dr. Brian Gladman, v3.1, 3rd March 2001. */ |
| 3675 | /* N values so that (hex) xy = 0x03^N. |
| 3676 | 0x00 cannot be used. We put 0xff for this value.*/ |
| 3677 | /* a 16 x 16 matrix */ |
| 3678 | static const UChar Nxy[256] = { // row nr |
| 3679 | 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1 |
| 3680 | 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, |
| 3681 | 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2 |
| 3682 | 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, |
| 3683 | 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3 |
| 3684 | 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, |
| 3685 | 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4 |
| 3686 | 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, |
| 3687 | 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5 |
| 3688 | 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, |
| 3689 | 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6 |
| 3690 | 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, |
| 3691 | 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7 |
| 3692 | 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, |
| 3693 | 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8 |
| 3694 | 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, |
| 3695 | 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9 |
| 3696 | 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, |
| 3697 | 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10 |
| 3698 | 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, |
| 3699 | 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11 |
| 3700 | 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, |
| 3701 | 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12 |
| 3702 | 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, |
| 3703 | 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13 |
| 3704 | 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, |
| 3705 | 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14 |
| 3706 | 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, |
| 3707 | 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15 |
| 3708 | 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, |
| 3709 | 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16 |
| 3710 | 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07 |
| 3711 | }; |
| 3712 | |
| 3713 | /* E values so that E = 0x03^xy. */ |
| 3714 | static const UChar Exy[256] = { // row nr |
| 3715 | 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1 |
| 3716 | 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35, |
| 3717 | 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2 |
| 3718 | 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa, |
| 3719 | 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3 |
| 3720 | 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31, |
| 3721 | 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4 |
| 3722 | 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd, |
| 3723 | 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5 |
| 3724 | 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88, |
| 3725 | 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6 |
| 3726 | 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a, |
| 3727 | 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7 |
| 3728 | 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3, |
| 3729 | 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8 |
| 3730 | 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0, |
| 3731 | 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9 |
| 3732 | 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41, |
| 3733 | 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10 |
| 3734 | 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75, |
| 3735 | 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11 |
| 3736 | 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80, |
| 3737 | 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12 |
| 3738 | 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54, |
| 3739 | 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13 |
| 3740 | 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca, |
| 3741 | 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14 |
| 3742 | 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e, |
| 3743 | 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15 |
| 3744 | 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17, |
| 3745 | 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16 |
| 3746 | 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01}; |
| 3747 | |
| 3748 | static inline UChar ff_mul(UChar u1, UChar u2) |
| 3749 | { |
| 3750 | if ((u1 > 0) && (u2 > 0)) { |
| 3751 | UInt ui = Nxy[u1] + Nxy[u2]; |
| 3752 | if (ui >= 255) |
| 3753 | ui = ui - 255; |
| 3754 | return Exy[ui]; |
| 3755 | } else { |
| 3756 | return 0; |
| 3757 | }; |
| 3758 | } |
| 3759 | |
| 3760 | static void MixColumns (V128* v) |
| 3761 | { |
| 3762 | V128 r; |
| 3763 | Int j; |
| 3764 | #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| 3765 | for (j = 0; j < 4; j++) { |
| 3766 | P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1)) |
| 3767 | ^ P(v,j,2) ^ P(v,j,3); |
| 3768 | P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) ) |
| 3769 | ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3); |
| 3770 | P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) ) |
| 3771 | ^ ff_mul(0x03, P(v,j,3) ); |
| 3772 | P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2) |
| 3773 | ^ ff_mul( 0x02, P(v,j,3) ); |
| 3774 | } |
| 3775 | *v = r; |
| 3776 | #undef P |
| 3777 | } |
| 3778 | |
| 3779 | static void InvMixColumns (V128* v) |
| 3780 | { |
| 3781 | V128 r; |
| 3782 | Int j; |
| 3783 | #define P(x,row,col) (x)->w8[((row)*4+(col))] |
| 3784 | for (j = 0; j < 4; j++) { |
| 3785 | P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) ) |
| 3786 | ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) ); |
| 3787 | P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) ) |
| 3788 | ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) ); |
| 3789 | P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) ) |
| 3790 | ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) ); |
| 3791 | P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) ) |
| 3792 | ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) ); |
| 3793 | } |
| 3794 | *v = r; |
| 3795 | #undef P |
| 3796 | |
| 3797 | } |
| 3798 | |
| 3799 | /* For description, see definition in guest_amd64_defs.h */ |
| 3800 | void amd64g_dirtyhelper_AES ( |
| 3801 | VexGuestAMD64State* gst, |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3802 | HWord opc4, HWord gstOffD, |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3803 | HWord gstOffL, HWord gstOffR |
| 3804 | ) |
| 3805 | { |
| 3806 | // where the args are |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3807 | V128* argD = (V128*)( ((UChar*)gst) + gstOffD ); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3808 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 3809 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3810 | V128 r; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3811 | |
| 3812 | switch (opc4) { |
| 3813 | case 0xDC: /* AESENC */ |
| 3814 | case 0xDD: /* AESENCLAST */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3815 | r = *argR; |
| 3816 | ShiftRows (&r); |
| 3817 | SubBytes (&r); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3818 | if (opc4 == 0xDC) |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3819 | MixColumns (&r); |
| 3820 | argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| 3821 | argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3822 | break; |
| 3823 | |
| 3824 | case 0xDE: /* AESDEC */ |
| 3825 | case 0xDF: /* AESDECLAST */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3826 | r = *argR; |
| 3827 | InvShiftRows (&r); |
| 3828 | InvSubBytes (&r); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3829 | if (opc4 == 0xDE) |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3830 | InvMixColumns (&r); |
| 3831 | argD->w64[0] = r.w64[0] ^ argL->w64[0]; |
| 3832 | argD->w64[1] = r.w64[1] ^ argL->w64[1]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3833 | break; |
| 3834 | |
| 3835 | case 0xDB: /* AESIMC */ |
sewardj | 1407a36 | 2012-06-24 15:11:38 +0000 | [diff] [blame] | 3836 | *argD = *argL; |
| 3837 | InvMixColumns (argD); |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3838 | break; |
| 3839 | default: vassert(0); |
| 3840 | } |
| 3841 | } |
| 3842 | |
| 3843 | static inline UInt RotWord (UInt w32) |
| 3844 | { |
| 3845 | return ((w32 >> 8) | (w32 << 24)); |
| 3846 | } |
| 3847 | |
| 3848 | static inline UInt SubWord (UInt w32) |
| 3849 | { |
| 3850 | UChar *w8; |
| 3851 | UChar *r8; |
| 3852 | UInt res; |
| 3853 | w8 = (UChar*) &w32; |
| 3854 | r8 = (UChar*) &res; |
| 3855 | r8[0] = sbox[w8[0]]; |
| 3856 | r8[1] = sbox[w8[1]]; |
| 3857 | r8[2] = sbox[w8[2]]; |
| 3858 | r8[3] = sbox[w8[3]]; |
| 3859 | return res; |
| 3860 | } |
| 3861 | |
| 3862 | /* For description, see definition in guest_amd64_defs.h */ |
| 3863 | extern void amd64g_dirtyhelper_AESKEYGENASSIST ( |
| 3864 | VexGuestAMD64State* gst, |
| 3865 | HWord imm8, |
| 3866 | HWord gstOffL, HWord gstOffR |
| 3867 | ) |
| 3868 | { |
| 3869 | // where the args are |
| 3870 | V128* argL = (V128*)( ((UChar*)gst) + gstOffL ); |
| 3871 | V128* argR = (V128*)( ((UChar*)gst) + gstOffR ); |
| 3872 | |
sewardj | a35a6db | 2014-12-09 21:01:28 +0000 | [diff] [blame] | 3873 | // We have to create the result in a temporary in the |
| 3874 | // case where the src and dst regs are the same. See #341698. |
| 3875 | V128 tmp; |
| 3876 | |
| 3877 | tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8; |
| 3878 | tmp.w32[2] = SubWord (argL->w32[3]); |
| 3879 | tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8; |
| 3880 | tmp.w32[0] = SubWord (argL->w32[1]); |
| 3881 | |
| 3882 | argR->w32[3] = tmp.w32[3]; |
| 3883 | argR->w32[2] = tmp.w32[2]; |
| 3884 | argR->w32[1] = tmp.w32[1]; |
| 3885 | argR->w32[0] = tmp.w32[0]; |
philippe | ff4d6be | 2012-02-14 21:34:56 +0000 | [diff] [blame] | 3886 | } |
| 3887 | |
| 3888 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 3889 | |
| 3890 | /*---------------------------------------------------------------*/ |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3891 | /*--- Helpers for dealing with, and describing, ---*/ |
| 3892 | /*--- guest state as a whole. ---*/ |
| 3893 | /*---------------------------------------------------------------*/ |
| 3894 | |
| 3895 | /* Initialise the entire amd64 guest state. */ |
| 3896 | /* VISIBLE TO LIBVEX CLIENT */ |
| 3897 | void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) |
| 3898 | { |
sewardj | c6f970f | 2012-04-02 21:54:49 +0000 | [diff] [blame] | 3899 | vex_state->host_EvC_FAILADDR = 0; |
| 3900 | vex_state->host_EvC_COUNTER = 0; |
| 3901 | vex_state->pad0 = 0; |
| 3902 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3903 | vex_state->guest_RAX = 0; |
| 3904 | vex_state->guest_RCX = 0; |
| 3905 | vex_state->guest_RDX = 0; |
| 3906 | vex_state->guest_RBX = 0; |
| 3907 | vex_state->guest_RSP = 0; |
| 3908 | vex_state->guest_RBP = 0; |
| 3909 | vex_state->guest_RSI = 0; |
| 3910 | vex_state->guest_RDI = 0; |
| 3911 | vex_state->guest_R8 = 0; |
| 3912 | vex_state->guest_R9 = 0; |
| 3913 | vex_state->guest_R10 = 0; |
| 3914 | vex_state->guest_R11 = 0; |
| 3915 | vex_state->guest_R12 = 0; |
| 3916 | vex_state->guest_R13 = 0; |
| 3917 | vex_state->guest_R14 = 0; |
| 3918 | vex_state->guest_R15 = 0; |
| 3919 | |
| 3920 | vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; |
| 3921 | vex_state->guest_CC_DEP1 = 0; |
| 3922 | vex_state->guest_CC_DEP2 = 0; |
| 3923 | vex_state->guest_CC_NDEP = 0; |
| 3924 | |
sewardj | d0a12df | 2005-02-10 02:07:43 +0000 | [diff] [blame] | 3925 | vex_state->guest_DFLAG = 1; /* forwards */ |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 3926 | vex_state->guest_IDFLAG = 0; |
sewardj | 0e457fc | 2013-12-11 16:47:59 +0000 | [diff] [blame] | 3927 | vex_state->guest_ACFLAG = 0; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3928 | |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 3929 | /* HACK: represent the offset associated with a constant %fs. |
| 3930 | Typically, on linux, this assumes that %fs is only ever zero (main |
| 3931 | thread) or 0x63. */ |
| 3932 | vex_state->guest_FS_CONST = 0; |
sewardj | a6b93d1 | 2005-02-17 09:28:28 +0000 | [diff] [blame] | 3933 | |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3934 | vex_state->guest_RIP = 0; |
| 3935 | |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 3936 | /* Initialise the simulated FPU */ |
| 3937 | amd64g_dirtyhelper_FINIT( vex_state ); |
| 3938 | |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 3939 | /* Initialise the AVX state. */ |
| 3940 | # define AVXZERO(_ymm) \ |
| 3941 | do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \ |
| 3942 | _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \ |
| 3943 | } while (0) |
sewardj | cb6091d | 2005-02-21 08:23:39 +0000 | [diff] [blame] | 3944 | vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 3945 | AVXZERO(vex_state->guest_YMM0); |
| 3946 | AVXZERO(vex_state->guest_YMM1); |
| 3947 | AVXZERO(vex_state->guest_YMM2); |
| 3948 | AVXZERO(vex_state->guest_YMM3); |
| 3949 | AVXZERO(vex_state->guest_YMM4); |
| 3950 | AVXZERO(vex_state->guest_YMM5); |
| 3951 | AVXZERO(vex_state->guest_YMM6); |
| 3952 | AVXZERO(vex_state->guest_YMM7); |
| 3953 | AVXZERO(vex_state->guest_YMM8); |
| 3954 | AVXZERO(vex_state->guest_YMM9); |
| 3955 | AVXZERO(vex_state->guest_YMM10); |
| 3956 | AVXZERO(vex_state->guest_YMM11); |
| 3957 | AVXZERO(vex_state->guest_YMM12); |
| 3958 | AVXZERO(vex_state->guest_YMM13); |
| 3959 | AVXZERO(vex_state->guest_YMM14); |
| 3960 | AVXZERO(vex_state->guest_YMM15); |
| 3961 | AVXZERO(vex_state->guest_YMM16); |
sewardj | cb6091d | 2005-02-21 08:23:39 +0000 | [diff] [blame] | 3962 | |
sewardj | c4530ae | 2012-05-21 10:18:49 +0000 | [diff] [blame] | 3963 | # undef AVXZERO |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3964 | |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 3965 | vex_state->guest_EMNOTE = EmNote_NONE; |
sewardj | 1f126c5 | 2005-03-16 13:57:58 +0000 | [diff] [blame] | 3966 | |
| 3967 | /* These should not ever be either read or written, but we |
| 3968 | initialise them anyway. */ |
sewardj | 05f5e01 | 2014-05-04 10:52:11 +0000 | [diff] [blame] | 3969 | vex_state->guest_CMSTART = 0; |
| 3970 | vex_state->guest_CMLEN = 0; |
sewardj | ce02aa7 | 2006-01-12 12:27:58 +0000 | [diff] [blame] | 3971 | |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 3972 | vex_state->guest_NRADDR = 0; |
| 3973 | vex_state->guest_SC_CLASS = 0; |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 3974 | vex_state->guest_GS_CONST = 0; |
sewardj | d660d41 | 2008-12-03 21:29:59 +0000 | [diff] [blame] | 3975 | |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 3976 | vex_state->guest_IP_AT_SYSCALL = 0; |
sewardj | c6f970f | 2012-04-02 21:54:49 +0000 | [diff] [blame] | 3977 | vex_state->pad1 = 0; |
sewardj | f8c37f7 | 2005-02-07 18:55:29 +0000 | [diff] [blame] | 3978 | } |
| 3979 | |
| 3980 | |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 3981 | /* Figure out if any part of the guest state contained in minoff |
| 3982 | .. maxoff requires precise memory exceptions. If in doubt return |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 3983 | True (but this generates significantly slower code). |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 3984 | |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 3985 | By default we enforce precise exns for guest %RSP, %RBP and %RIP |
| 3986 | only. These are the minimum needed to extract correct stack |
| 3987 | backtraces from amd64 code. |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 3988 | |
| 3989 | Only %RSP is needed in mode VexRegUpdSpAtMemAccess. |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 3990 | */ |
sewardj | ca2c3c7 | 2015-02-05 12:53:20 +0000 | [diff] [blame] | 3991 | Bool guest_amd64_state_requires_precise_mem_exns ( |
| 3992 | Int minoff, Int maxoff, VexRegisterUpdates pxControl |
| 3993 | ) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 3994 | { |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 3995 | Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); |
| 3996 | Int rbp_max = rbp_min + 8 - 1; |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 3997 | Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); |
| 3998 | Int rsp_max = rsp_min + 8 - 1; |
| 3999 | Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); |
| 4000 | Int rip_max = rip_min + 8 - 1; |
| 4001 | |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4002 | if (maxoff < rsp_min || minoff > rsp_max) { |
| 4003 | /* no overlap with rsp */ |
sewardj | ca2c3c7 | 2015-02-05 12:53:20 +0000 | [diff] [blame] | 4004 | if (pxControl == VexRegUpdSpAtMemAccess) |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4005 | return False; // We only need to check stack pointer. |
sewardj | 4cca75c | 2005-03-16 11:52:25 +0000 | [diff] [blame] | 4006 | } else { |
| 4007 | return True; |
| 4008 | } |
| 4009 | |
philippe | 6c46bef | 2012-08-14 22:29:01 +0000 | [diff] [blame] | 4010 | if (maxoff < rbp_min || minoff > rbp_max) { |
| 4011 | /* no overlap with rbp */ |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4012 | } else { |
| 4013 | return True; |
| 4014 | } |
| 4015 | |
| 4016 | if (maxoff < rip_min || minoff > rip_max) { |
| 4017 | /* no overlap with eip */ |
| 4018 | } else { |
| 4019 | return True; |
| 4020 | } |
| 4021 | |
| 4022 | return False; |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4023 | } |
sewardj | 2f959cc | 2005-01-26 01:19:35 +0000 | [diff] [blame] | 4024 | |
| 4025 | |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4026 | #define ALWAYSDEFD(field) \ |
| 4027 | { offsetof(VexGuestAMD64State, field), \ |
| 4028 | (sizeof ((VexGuestAMD64State*)0)->field) } |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4029 | |
| 4030 | VexGuestLayout |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4031 | amd64guest_layout |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4032 | = { |
| 4033 | /* Total size of the guest state, in bytes. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4034 | .total_sizeB = sizeof(VexGuestAMD64State), |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4035 | |
| 4036 | /* Describe the stack pointer. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4037 | .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), |
| 4038 | .sizeof_SP = 8, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4039 | |
sewardj | a203330 | 2008-08-19 11:15:10 +0000 | [diff] [blame] | 4040 | /* Describe the frame pointer. */ |
| 4041 | .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), |
| 4042 | .sizeof_FP = 8, |
| 4043 | |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4044 | /* Describe the instruction pointer. */ |
sewardj | c85e91c | 2005-02-07 14:59:28 +0000 | [diff] [blame] | 4045 | .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), |
| 4046 | .sizeof_IP = 8, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4047 | |
| 4048 | /* Describe any sections to be regarded by Memcheck as |
| 4049 | 'always-defined'. */ |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 4050 | .n_alwaysDefd = 16, |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4051 | |
| 4052 | /* flags thunk: OP and NDEP are always defd, whereas DEP1 |
| 4053 | and DEP2 have to be tracked. See detailed comment in |
| 4054 | gdefs.h on meaning of thunk fields. */ |
| 4055 | .alwaysDefd |
| 4056 | = { /* 0 */ ALWAYSDEFD(guest_CC_OP), |
| 4057 | /* 1 */ ALWAYSDEFD(guest_CC_NDEP), |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 4058 | /* 2 */ ALWAYSDEFD(guest_DFLAG), |
| 4059 | /* 3 */ ALWAYSDEFD(guest_IDFLAG), |
| 4060 | /* 4 */ ALWAYSDEFD(guest_RIP), |
philippe | e2cc4de | 2014-12-16 23:57:51 +0000 | [diff] [blame] | 4061 | /* 5 */ ALWAYSDEFD(guest_FS_CONST), |
sewardj | 8d96531 | 2005-02-25 02:48:47 +0000 | [diff] [blame] | 4062 | /* 6 */ ALWAYSDEFD(guest_FTOP), |
| 4063 | /* 7 */ ALWAYSDEFD(guest_FPTAG), |
| 4064 | /* 8 */ ALWAYSDEFD(guest_FPROUND), |
| 4065 | /* 9 */ ALWAYSDEFD(guest_FC3210), |
sewardj | 85520e4 | 2005-02-19 15:22:38 +0000 | [diff] [blame] | 4066 | // /* */ ALWAYSDEFD(guest_CS), |
| 4067 | // /* */ ALWAYSDEFD(guest_DS), |
| 4068 | // /* */ ALWAYSDEFD(guest_ES), |
| 4069 | // /* */ ALWAYSDEFD(guest_FS), |
| 4070 | // /* */ ALWAYSDEFD(guest_GS), |
| 4071 | // /* */ ALWAYSDEFD(guest_SS), |
| 4072 | // /* */ ALWAYSDEFD(guest_LDT), |
| 4073 | // /* */ ALWAYSDEFD(guest_GDT), |
florian | 6ef84be | 2012-08-26 03:20:07 +0000 | [diff] [blame] | 4074 | /* 10 */ ALWAYSDEFD(guest_EMNOTE), |
sewardj | 16a403b | 2005-07-07 12:26:36 +0000 | [diff] [blame] | 4075 | /* 11 */ ALWAYSDEFD(guest_SSEROUND), |
sewardj | 05f5e01 | 2014-05-04 10:52:11 +0000 | [diff] [blame] | 4076 | /* 12 */ ALWAYSDEFD(guest_CMSTART), |
| 4077 | /* 13 */ ALWAYSDEFD(guest_CMLEN), |
sewardj | e86310f | 2009-03-19 22:21:40 +0000 | [diff] [blame] | 4078 | /* 14 */ ALWAYSDEFD(guest_SC_CLASS), |
| 4079 | /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) |
sewardj | 44d494d | 2005-01-20 20:26:33 +0000 | [diff] [blame] | 4080 | } |
| 4081 | }; |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4082 | |
| 4083 | |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4084 | /*---------------------------------------------------------------*/ |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 4085 | /*--- end guest_amd64_helpers.c ---*/ |
njn | 9c6acb0 | 2004-11-30 15:56:47 +0000 | [diff] [blame] | 4086 | /*---------------------------------------------------------------*/ |