blob: 2887b089588b662c136a939d782dda22f2c74153 [file] [log] [blame]
njn9c6acb02004-11-30 15:56:47 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin guest_amd64_helpers.c ---*/
njn9c6acb02004-11-30 15:56:47 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
njn9c6acb02004-11-30 15:56:47 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
njn9c6acb02004-11-30 15:56:47 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
njn9c6acb02004-11-30 15:56:47 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
njn9c6acb02004-11-30 15:56:47 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
njn9c6acb02004-11-30 15:56:47 +000034*/
35
36#include "libvex_basictypes.h"
florian33b02432012-08-25 21:48:04 +000037#include "libvex_emnote.h"
njn9c6acb02004-11-30 15:56:47 +000038#include "libvex_guest_amd64.h"
39#include "libvex_ir.h"
40#include "libvex.h"
41
sewardjcef7d3e2009-07-02 12:21:59 +000042#include "main_util.h"
philippe6c46bef2012-08-14 22:29:01 +000043#include "main_globals.h"
sewardjcef7d3e2009-07-02 12:21:59 +000044#include "guest_generic_bb_to_IR.h"
45#include "guest_amd64_defs.h"
46#include "guest_generic_x87.h"
sewardj44d494d2005-01-20 20:26:33 +000047
sewardjf8c37f72005-02-07 18:55:29 +000048
49/* This file contains helper functions for amd64 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-amd64/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named amd64g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65*/
66
67
68/* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70#define PROFILE_RFLAGS 0
71
72
73/*---------------------------------------------------------------*/
74/*--- %rflags run-time helpers. ---*/
75/*---------------------------------------------------------------*/
76
sewardj1a01e652005-02-23 11:39:21 +000077/* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 after imulq/mulq. */
79
80static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81{
82 ULong u0, v0, w0;
83 Long u1, v1, w1, w2, t;
sewardjdbdc5b32005-03-25 20:31:46 +000084 u0 = u & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +000085 u1 = u >> 32;
sewardjdbdc5b32005-03-25 20:31:46 +000086 v0 = v & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +000087 v1 = v >> 32;
88 w0 = u0 * v0;
89 t = u1 * v0 + (w0 >> 32);
sewardjdbdc5b32005-03-25 20:31:46 +000090 w1 = t & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +000091 w2 = t >> 32;
92 w1 = u0 * v1 + w1;
93 *rHi = u1 * v1 + w2 + (w1 >> 32);
94 *rLo = u * v;
95}
96
97static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
98{
99 ULong u0, v0, w0;
100 ULong u1, v1, w1,w2,t;
sewardjdbdc5b32005-03-25 20:31:46 +0000101 u0 = u & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +0000102 u1 = u >> 32;
sewardjdbdc5b32005-03-25 20:31:46 +0000103 v0 = v & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +0000104 v1 = v >> 32;
105 w0 = u0 * v0;
106 t = u1 * v0 + (w0 >> 32);
sewardjdbdc5b32005-03-25 20:31:46 +0000107 w1 = t & 0xFFFFFFFFULL;
sewardj1a01e652005-02-23 11:39:21 +0000108 w2 = t >> 32;
109 w1 = u0 * v1 + w1;
110 *rHi = u1 * v1 + w2 + (w1 >> 32);
111 *rLo = u * v;
112}
113
114
sewardjf8c37f72005-02-07 18:55:29 +0000115static const UChar parity_table[256] = {
116 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
119 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
127 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
131 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
135 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
143 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
147 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
148};
149
sewardj4a6f3842005-03-26 11:59:23 +0000150/* generalised left-shifter */
sewardj1fa7b802005-03-25 14:39:37 +0000151static inline Long lshift ( Long x, Int n )
sewardj118b23e2005-01-29 02:14:44 +0000152{
sewardjf8c37f72005-02-07 18:55:29 +0000153 if (n >= 0)
florian108e03f2015-03-10 16:11:58 +0000154 return (ULong)x << n;
sewardjf8c37f72005-02-07 18:55:29 +0000155 else
156 return x >> (-n);
sewardj118b23e2005-01-29 02:14:44 +0000157}
158
sewardj1fa7b802005-03-25 14:39:37 +0000159/* identity on ULong */
160static inline ULong idULong ( ULong x )
161{
162 return x;
163}
164
sewardj118b23e2005-01-29 02:14:44 +0000165
sewardjf8c37f72005-02-07 18:55:29 +0000166#define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
168 = __data_bits==8 \
169 ? 0xFFULL \
170 : (__data_bits==16 \
171 ? 0xFFFFULL \
172 : (__data_bits==32 \
173 ? 0xFFFFFFFFULL \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
184 CC_DEP2 = CC_DEP2; \
185 CC_NDEP = CC_NDEP;
186
187
188/*-------------------------------------------------------------*/
189
190#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
191{ \
192 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000195 argL = CC_DEP1; \
196 argR = CC_DEP2; \
197 res = argL + argR; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
206 } \
sewardjdf0e0022005-01-25 15:48:43 +0000207}
sewardj44d494d2005-01-20 20:26:33 +0000208
sewardjf8c37f72005-02-07 18:55:29 +0000209/*-------------------------------------------------------------*/
210
211#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
212{ \
213 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000216 argL = CC_DEP1; \
217 argR = CC_DEP2; \
218 res = argL - argR; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
227 } \
sewardj354e5c62005-01-27 20:12:52 +0000228}
229
sewardjf8c37f72005-02-07 18:55:29 +0000230/*-------------------------------------------------------------*/
231
232#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
233{ \
234 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
238 argL = CC_DEP1; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
241 if (oldC) \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
243 else \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
252 } \
253}
254
255/*-------------------------------------------------------------*/
256
257#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
258{ \
259 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
263 argL = CC_DEP1; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
266 if (oldC) \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
268 else \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
277 } \
278}
279
280/*-------------------------------------------------------------*/
281
282#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
283{ \
284 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000285 { ULong cf, pf, af, zf, sf, of; \
sewardjf8c37f72005-02-07 18:55:29 +0000286 cf = 0; \
287 pf = parity_table[(UChar)CC_DEP1]; \
288 af = 0; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
291 of = 0; \
292 return cf | pf | af | zf | sf | of; \
293 } \
294}
295
296/*-------------------------------------------------------------*/
297
298#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
299{ \
300 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000303 res = CC_DEP1; \
304 argL = res - 1; \
305 argR = 1; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
313 } \
314}
315
316/*-------------------------------------------------------------*/
317
318#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
319{ \
320 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
sewardjf8c37f72005-02-07 18:55:29 +0000323 res = CC_DEP1; \
324 argL = res + 1; \
325 argR = 1; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
334 } \
335}
336
337/*-------------------------------------------------------------*/
338
339#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
340{ \
341 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000342 { ULong cf, pf, af, zf, sf, of; \
sewardjf8c37f72005-02-07 18:55:29 +0000343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
352 } \
353}
354
355/*-------------------------------------------------------------*/
356
357#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
358{ \
359 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000360 { ULong cf, pf, af, zf, sf, of; \
sewardjf8c37f72005-02-07 18:55:29 +0000361 cf = CC_DEP2 & 1; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
370 } \
371}
372
373/*-------------------------------------------------------------*/
374
375/* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376/* DEP1 = result, NDEP = old flags */
377#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
378{ \
379 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000380 { ULong fl \
sewardjf8c37f72005-02-07 18:55:29 +0000381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
sewardj7de0d3c2005-02-13 02:26:41 +0000382 | (AMD64G_CC_MASK_C & CC_DEP1) \
sewardjf8c37f72005-02-07 18:55:29 +0000383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
384 11-(DATA_BITS-1)) \
385 ^ lshift(CC_DEP1, 11))); \
386 return fl; \
387 } \
388}
389
390/*-------------------------------------------------------------*/
391
392/* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393/* DEP1 = result, NDEP = old flags */
394#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
395{ \
396 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000397 { ULong fl \
sewardjf8c37f72005-02-07 18:55:29 +0000398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
401 11-(DATA_BITS-1)) \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
403 return fl; \
404 } \
405}
406
407/*-------------------------------------------------------------*/
408
sewardj1fa7b802005-03-25 14:39:37 +0000409#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
sewardjf8c37f72005-02-07 18:55:29 +0000411{ \
412 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000413 { ULong cf, pf, af, zf, sf, of; \
sewardjf8c37f72005-02-07 18:55:29 +0000414 DATA_UTYPE hi; \
sewardj1fa7b802005-03-25 14:39:37 +0000415 DATA_UTYPE lo \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
418 DATA_U2TYPE rr \
419 = NARROWto2U( \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
sewardjf8c37f72005-02-07 18:55:29 +0000423 cf = (hi != 0); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
428 of = cf << 11; \
429 return cf | pf | af | zf | sf | of; \
430 } \
431}
432
433/*-------------------------------------------------------------*/
434
sewardj1fa7b802005-03-25 14:39:37 +0000435#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
sewardjf8c37f72005-02-07 18:55:29 +0000437{ \
438 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000439 { ULong cf, pf, af, zf, sf, of; \
sewardjf8c37f72005-02-07 18:55:29 +0000440 DATA_STYPE hi; \
sewardj1fa7b802005-03-25 14:39:37 +0000441 DATA_STYPE lo \
florian45f8de62015-03-12 10:21:29 +0000442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
sewardj1fa7b802005-03-25 14:39:37 +0000444 DATA_S2TYPE rr \
445 = NARROWto2S( \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
sewardjf8c37f72005-02-07 18:55:29 +0000449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
454 of = cf << 11; \
455 return cf | pf | af | zf | sf | of; \
456 } \
457}
458
sewardj1a01e652005-02-23 11:39:21 +0000459/*-------------------------------------------------------------*/
460
461#define ACTIONS_UMULQ \
462{ \
463 PREAMBLE(64); \
florian108e03f2015-03-10 16:11:58 +0000464 { ULong cf, pf, af, zf, sf, of; \
sewardj1a01e652005-02-23 11:39:21 +0000465 ULong lo, hi; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
467 cf = (hi != 0); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
472 of = cf << 11; \
473 return cf | pf | af | zf | sf | of; \
474 } \
475}
476
477/*-------------------------------------------------------------*/
478
479#define ACTIONS_SMULQ \
480{ \
481 PREAMBLE(64); \
florian108e03f2015-03-10 16:11:58 +0000482 { ULong cf, pf, af, zf, sf, of; \
sewardj1a01e652005-02-23 11:39:21 +0000483 Long lo, hi; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
490 of = cf << 11; \
491 return cf | pf | af | zf | sf | of; \
492 } \
493}
494
sewardjcc3d2192013-03-27 11:37:33 +0000495/*-------------------------------------------------------------*/
496
497#define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
498{ \
499 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000500 { ULong cf, pf, af, zf, sf, of; \
sewardjcc3d2192013-03-27 11:37:33 +0000501 cf = 0; \
502 pf = 0; \
503 af = 0; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
506 of = 0; \
507 return cf | pf | af | zf | sf | of; \
508 } \
509}
510
511/*-------------------------------------------------------------*/
512
513#define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
514{ \
515 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000516 { ULong cf, pf, af, zf, sf, of; \
sewardjcc3d2192013-03-27 11:37:33 +0000517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
518 pf = 0; \
519 af = 0; \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
522 of = 0; \
523 return cf | pf | af | zf | sf | of; \
524 } \
525}
526
527/*-------------------------------------------------------------*/
528
529#define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
530{ \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
534 pf = 0; \
535 af = 0; \
536 zf = 0; \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
538 of = 0; \
539 return cf | pf | af | zf | sf | of; \
540 } \
541}
542
543/*-------------------------------------------------------------*/
544
545#define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
546{ \
547 PREAMBLE(DATA_BITS); \
florian108e03f2015-03-10 16:11:58 +0000548 { ULong cf, pf, af, zf, sf, of; \
sewardjcc3d2192013-03-27 11:37:33 +0000549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
550 pf = 0; \
551 af = 0; \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
554 of = 0; \
555 return cf | pf | af | zf | sf | of; \
556 } \
557}
558
559/*-------------------------------------------------------------*/
560
sewardjf8c37f72005-02-07 18:55:29 +0000561
sewardj1fa7b802005-03-25 14:39:37 +0000562#if PROFILE_RFLAGS
sewardjf8c37f72005-02-07 18:55:29 +0000563
564static Bool initted = False;
565
566/* C flag, fast route */
567static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
568/* C flag, slow route */
569static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
570/* table for calculate_cond */
571static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
572/* total entry counts for calc_all, calc_c, calc_cond. */
573static UInt n_calc_all = 0;
574static UInt n_calc_c = 0;
575static UInt n_calc_cond = 0;
576
577#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
578
579
580static void showCounts ( void )
581{
582 Int op, co;
florian5df8ab02012-10-13 19:34:19 +0000583 HChar ch;
sewardj1fa7b802005-03-25 14:39:37 +0000584 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
sewardjf8c37f72005-02-07 18:55:29 +0000585 n_calc_all, n_calc_cond, n_calc_c);
586
587 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
588 " S NS P NP L NL LE NLE\n");
589 vex_printf(" -----------------------------------------------------"
590 "----------------------------------------\n");
591 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
592
593 ch = ' ';
sewardj03540352005-04-26 01:53:48 +0000594 if (op > 0 && (op-1) % 4 == 0)
sewardjf8c37f72005-02-07 18:55:29 +0000595 ch = 'B';
sewardj03540352005-04-26 01:53:48 +0000596 if (op > 0 && (op-1) % 4 == 1)
sewardjf8c37f72005-02-07 18:55:29 +0000597 ch = 'W';
sewardj03540352005-04-26 01:53:48 +0000598 if (op > 0 && (op-1) % 4 == 2)
sewardjf8c37f72005-02-07 18:55:29 +0000599 ch = 'L';
sewardj03540352005-04-26 01:53:48 +0000600 if (op > 0 && (op-1) % 4 == 3)
601 ch = 'Q';
sewardjf8c37f72005-02-07 18:55:29 +0000602
603 vex_printf("%2d%c: ", op, ch);
sewardj1fa7b802005-03-25 14:39:37 +0000604 vex_printf("%6u ", tabc_slow[op]);
605 vex_printf("%6u ", tabc_fast[op]);
sewardjf8c37f72005-02-07 18:55:29 +0000606 for (co = 0; co < 16; co++) {
607 Int n = tab_cond[op][co];
608 if (n >= 1000) {
609 vex_printf(" %3dK", n / 1000);
610 } else
611 if (n >= 0) {
612 vex_printf(" %3d ", n );
613 } else {
614 vex_printf(" ");
615 }
616 }
617 vex_printf("\n");
618 }
619 vex_printf("\n");
620}
621
622static void initCounts ( void )
623{
624 Int op, co;
625 initted = True;
626 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
627 tabc_fast[op] = tabc_slow[op] = 0;
628 for (co = 0; co < 16; co++)
629 tab_cond[op][co] = 0;
630 }
631}
632
sewardj1fa7b802005-03-25 14:39:37 +0000633#endif /* PROFILE_RFLAGS */
sewardjf8c37f72005-02-07 18:55:29 +0000634
635
636/* CALLED FROM GENERATED CODE: CLEAN HELPER */
637/* Calculate all the 6 flags from the supplied thunk parameters.
638 Worker function, not directly called from generated code. */
639static
640ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
641 ULong cc_dep1_formal,
642 ULong cc_dep2_formal,
643 ULong cc_ndep_formal )
644{
645 switch (cc_op) {
646 case AMD64G_CC_OP_COPY:
647 return cc_dep1_formal
648 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
649 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
650
651 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
652 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
653 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
sewardjd0a12df2005-02-10 02:07:43 +0000654 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000655
656 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
657 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
658 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
sewardj85520e42005-02-19 15:22:38 +0000659 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000660
661 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
662 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
663 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
664 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
665
666 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
667 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
668 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
sewardj85520e42005-02-19 15:22:38 +0000669 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000670
671 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
672 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
673 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
674 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
675
676 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
677 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
678 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
sewardj7de0d3c2005-02-13 02:26:41 +0000679 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000680
681 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
682 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
683 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
sewardj7de0d3c2005-02-13 02:26:41 +0000684 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000685
686 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
687 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
688 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
sewardj7de0d3c2005-02-13 02:26:41 +0000689 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000690
691 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
692 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
693 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
sewardja6b93d12005-02-17 09:28:28 +0000694 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000695
696 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
697 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
698 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
sewardj85520e42005-02-19 15:22:38 +0000699 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000700
701 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
702 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
703 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
sewardj85520e42005-02-19 15:22:38 +0000704 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
sewardjf8c37f72005-02-07 18:55:29 +0000705
sewardj1fa7b802005-03-25 14:39:37 +0000706 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
707 UShort, toUShort );
708 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
709 UInt, toUInt );
710 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
711 ULong, idULong );
sewardjf8c37f72005-02-07 18:55:29 +0000712
sewardj8bdb89a2005-05-05 21:46:50 +0000713 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
714
sewardj1fa7b802005-03-25 14:39:37 +0000715 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
716 Short, toUShort );
717 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
718 Int, toUInt );
719 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
720 Long, idULong );
721
sewardj1a01e652005-02-23 11:39:21 +0000722 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
sewardjf8c37f72005-02-07 18:55:29 +0000723
sewardjcc3d2192013-03-27 11:37:33 +0000724 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
725 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
726
727 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
728 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
729
730 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
731 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
732
733 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
734 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
735
sewardjf8c37f72005-02-07 18:55:29 +0000736 default:
737 /* shouldn't really make these calls from generated code */
738 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
sewardj1fa7b802005-03-25 14:39:37 +0000739 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
sewardjf8c37f72005-02-07 18:55:29 +0000740 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
741 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
742 }
743}
744
745
746/* CALLED FROM GENERATED CODE: CLEAN HELPER */
747/* Calculate all the 6 flags from the supplied thunk parameters. */
748ULong amd64g_calculate_rflags_all ( ULong cc_op,
749 ULong cc_dep1,
750 ULong cc_dep2,
751 ULong cc_ndep )
752{
sewardj1fa7b802005-03-25 14:39:37 +0000753# if PROFILE_RFLAGS
sewardjf8c37f72005-02-07 18:55:29 +0000754 if (!initted) initCounts();
755 n_calc_all++;
756 if (SHOW_COUNTS_NOW) showCounts();
757# endif
758 return
759 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
760}
761
762
763/* CALLED FROM GENERATED CODE: CLEAN HELPER */
764/* Calculate just the carry flag from the supplied thunk parameters. */
765ULong amd64g_calculate_rflags_c ( ULong cc_op,
766 ULong cc_dep1,
767 ULong cc_dep2,
768 ULong cc_ndep )
769{
sewardj1fa7b802005-03-25 14:39:37 +0000770# if PROFILE_RFLAGS
sewardjf8c37f72005-02-07 18:55:29 +0000771 if (!initted) initCounts();
772 n_calc_c++;
773 tabc_fast[cc_op]++;
774 if (SHOW_COUNTS_NOW) showCounts();
775# endif
776
777 /* Fast-case some common ones. */
778 switch (cc_op) {
sewardj7fc494b2005-05-05 12:05:11 +0000779 case AMD64G_CC_OP_COPY:
780 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
sewardj03540352005-04-26 01:53:48 +0000781 case AMD64G_CC_OP_LOGICQ:
sewardjf8c37f72005-02-07 18:55:29 +0000782 case AMD64G_CC_OP_LOGICL:
783 case AMD64G_CC_OP_LOGICW:
784 case AMD64G_CC_OP_LOGICB:
785 return 0;
sewardj03540352005-04-26 01:53:48 +0000786 // case AMD64G_CC_OP_SUBL:
787 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
788 // ? AMD64G_CC_MASK_C : 0;
789 // case AMD64G_CC_OP_SUBW:
790 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
791 // ? AMD64G_CC_MASK_C : 0;
792 // case AMD64G_CC_OP_SUBB:
793 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
794 // ? AMD64G_CC_MASK_C : 0;
795 // case AMD64G_CC_OP_INCL:
796 // case AMD64G_CC_OP_DECL:
797 // return cc_ndep & AMD64G_CC_MASK_C;
sewardjf8c37f72005-02-07 18:55:29 +0000798 default:
799 break;
800 }
801
sewardj1fa7b802005-03-25 14:39:37 +0000802# if PROFILE_RFLAGS
sewardjf8c37f72005-02-07 18:55:29 +0000803 tabc_fast[cc_op]--;
804 tabc_slow[cc_op]++;
805# endif
806
807 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
808 & AMD64G_CC_MASK_C;
809}
810
811
812/* CALLED FROM GENERATED CODE: CLEAN HELPER */
813/* returns 1 or 0 */
814ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
815 ULong cc_op,
816 ULong cc_dep1,
817 ULong cc_dep2,
818 ULong cc_ndep )
819{
820 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
821 cc_dep2, cc_ndep);
822 ULong of,sf,zf,cf,pf;
823 ULong inv = cond & 1;
824
sewardj1fa7b802005-03-25 14:39:37 +0000825# if PROFILE_RFLAGS
sewardjf8c37f72005-02-07 18:55:29 +0000826 if (!initted) initCounts();
827 tab_cond[cc_op][cond]++;
828 n_calc_cond++;
829 if (SHOW_COUNTS_NOW) showCounts();
830# endif
831
832 switch (cond) {
833 case AMD64CondNO:
834 case AMD64CondO: /* OF == 1 */
835 of = rflags >> AMD64G_CC_SHIFT_O;
836 return 1 & (inv ^ of);
837
838 case AMD64CondNZ:
839 case AMD64CondZ: /* ZF == 1 */
840 zf = rflags >> AMD64G_CC_SHIFT_Z;
841 return 1 & (inv ^ zf);
842
843 case AMD64CondNB:
844 case AMD64CondB: /* CF == 1 */
845 cf = rflags >> AMD64G_CC_SHIFT_C;
846 return 1 & (inv ^ cf);
847 break;
848
849 case AMD64CondNBE:
850 case AMD64CondBE: /* (CF or ZF) == 1 */
851 cf = rflags >> AMD64G_CC_SHIFT_C;
852 zf = rflags >> AMD64G_CC_SHIFT_Z;
853 return 1 & (inv ^ (cf | zf));
854 break;
855
856 case AMD64CondNS:
857 case AMD64CondS: /* SF == 1 */
858 sf = rflags >> AMD64G_CC_SHIFT_S;
859 return 1 & (inv ^ sf);
860
861 case AMD64CondNP:
862 case AMD64CondP: /* PF == 1 */
863 pf = rflags >> AMD64G_CC_SHIFT_P;
864 return 1 & (inv ^ pf);
865
866 case AMD64CondNL:
867 case AMD64CondL: /* (SF xor OF) == 1 */
868 sf = rflags >> AMD64G_CC_SHIFT_S;
869 of = rflags >> AMD64G_CC_SHIFT_O;
870 return 1 & (inv ^ (sf ^ of));
871 break;
872
873 case AMD64CondNLE:
874 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
875 sf = rflags >> AMD64G_CC_SHIFT_S;
876 of = rflags >> AMD64G_CC_SHIFT_O;
877 zf = rflags >> AMD64G_CC_SHIFT_Z;
878 return 1 & (inv ^ ((sf ^ of) | zf));
879 break;
880
881 default:
882 /* shouldn't really make these calls from generated code */
883 vex_printf("amd64g_calculate_condition"
sewardj1fa7b802005-03-25 14:39:37 +0000884 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
sewardjf8c37f72005-02-07 18:55:29 +0000885 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
886 vpanic("amd64g_calculate_condition");
887 }
888}
889
890
891/* VISIBLE TO LIBVEX CLIENT */
florianefa834a2012-11-24 21:07:14 +0000892ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
sewardjf8c37f72005-02-07 18:55:29 +0000893{
894 ULong rflags = amd64g_calculate_rflags_all_WRK(
895 vex_state->guest_CC_OP,
896 vex_state->guest_CC_DEP1,
897 vex_state->guest_CC_DEP2,
898 vex_state->guest_CC_NDEP
899 );
sewardj7de0d3c2005-02-13 02:26:41 +0000900 Long dflag = vex_state->guest_DFLAG;
901 vassert(dflag == 1 || dflag == -1);
902 if (dflag == -1)
sewardjf8c37f72005-02-07 18:55:29 +0000903 rflags |= (1<<10);
sewardj85520e42005-02-19 15:22:38 +0000904 if (vex_state->guest_IDFLAG == 1)
905 rflags |= (1<<21);
sewardj5e120aa2010-09-28 15:59:04 +0000906 if (vex_state->guest_ACFLAG == 1)
907 rflags |= (1<<18);
908
sewardjf8c37f72005-02-07 18:55:29 +0000909 return rflags;
910}
sewardjf8c37f72005-02-07 18:55:29 +0000911
sewardjd660d412008-12-03 21:29:59 +0000912/* VISIBLE TO LIBVEX CLIENT */
913void
914LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
915 /*MOD*/VexGuestAMD64State* vex_state )
916{
917 ULong oszacp = amd64g_calculate_rflags_all_WRK(
918 vex_state->guest_CC_OP,
919 vex_state->guest_CC_DEP1,
920 vex_state->guest_CC_DEP2,
921 vex_state->guest_CC_NDEP
922 );
923 if (new_carry_flag & 1) {
924 oszacp |= AMD64G_CC_MASK_C;
925 } else {
926 oszacp &= ~AMD64G_CC_MASK_C;
927 }
928 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
929 vex_state->guest_CC_DEP1 = oszacp;
930 vex_state->guest_CC_DEP2 = 0;
931 vex_state->guest_CC_NDEP = 0;
932}
933
sewardjf8c37f72005-02-07 18:55:29 +0000934
935/*---------------------------------------------------------------*/
936/*--- %rflags translation-time function specialisers. ---*/
937/*--- These help iropt specialise calls the above run-time ---*/
938/*--- %rflags functions. ---*/
939/*---------------------------------------------------------------*/
940
sewardj03540352005-04-26 01:53:48 +0000941/* Used by the optimiser to try specialisations. Returns an
942 equivalent expression, or NULL if none. */
943
944static Bool isU64 ( IRExpr* e, ULong n )
945{
sewardj65b17c62005-05-02 15:52:44 +0000946 return toBool( e->tag == Iex_Const
947 && e->Iex.Const.con->tag == Ico_U64
948 && e->Iex.Const.con->Ico.U64 == n );
sewardj03540352005-04-26 01:53:48 +0000949}
sewardj354e5c62005-01-27 20:12:52 +0000950
florian1ff47562012-10-21 02:09:51 +0000951IRExpr* guest_amd64_spechelper ( const HChar* function_name,
sewardjbe917912010-08-22 12:38:53 +0000952 IRExpr** args,
953 IRStmt** precedingStmts,
954 Int n_precedingStmts )
sewardj44d494d2005-01-20 20:26:33 +0000955{
sewardj03540352005-04-26 01:53:48 +0000956# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
957# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
958# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj9cc2bbf2011-06-05 17:56:03 +0000959# define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
sewardj03540352005-04-26 01:53:48 +0000960# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
961
962 Int i, arity = 0;
963 for (i = 0; args[i]; i++)
964 arity++;
965# if 0
966 vex_printf("spec request:\n");
967 vex_printf(" %s ", function_name);
968 for (i = 0; i < arity; i++) {
969 vex_printf(" ");
970 ppIRExpr(args[i]);
971 }
972 vex_printf("\n");
973# endif
974
975 /* --------- specialising "amd64g_calculate_condition" --------- */
976
977 if (vex_streq(function_name, "amd64g_calculate_condition")) {
978 /* specialise calls to above "calculate condition" function */
979 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
980 vassert(arity == 5);
981 cond = args[0];
982 cc_op = args[1];
983 cc_dep1 = args[2];
984 cc_dep2 = args[3];
985
sewardjdb261e42005-05-11 23:16:43 +0000986 /*---------------- ADDQ ----------------*/
987
988 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
989 /* long long add, then Z --> test (dst+src == 0) */
990 return unop(Iop_1Uto64,
991 binop(Iop_CmpEQ64,
992 binop(Iop_Add64, cc_dep1, cc_dep2),
993 mkU64(0)));
994 }
sewardj03540352005-04-26 01:53:48 +0000995
sewardjaedb8592014-10-02 16:15:30 +0000996 /*---------------- ADDL ----------------*/
997
998 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
999 /* This is very commonly generated by Javascript JITs, for
1000 the idiom "do a 32-bit add and jump to out-of-line code if
1001 an overflow occurs". */
1002 /* long add, then O (overflow)
1003 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1004 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1005 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1006 */
1007 vassert(isIRAtom(cc_dep1));
1008 vassert(isIRAtom(cc_dep2));
1009 return
1010 binop(Iop_And64,
1011 binop(Iop_Shr64,
1012 binop(Iop_And64,
1013 unop(Iop_Not64,
1014 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1015 binop(Iop_Xor64,
1016 cc_dep1,
1017 binop(Iop_Add64, cc_dep1, cc_dep2))),
1018 mkU8(31)),
1019 mkU64(1));
1020
1021 }
1022
sewardj4b06a0b2005-11-13 19:51:04 +00001023 /*---------------- SUBQ ----------------*/
1024
sewardjaedb8592014-10-02 16:15:30 +00001025 /* 0, */
1026 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1027 /* long long sub/cmp, then O (overflow)
1028 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1029 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1030 */
1031 vassert(isIRAtom(cc_dep1));
1032 vassert(isIRAtom(cc_dep2));
1033 return binop(Iop_Shr64,
1034 binop(Iop_And64,
1035 binop(Iop_Xor64, cc_dep1, cc_dep2),
1036 binop(Iop_Xor64,
1037 cc_dep1,
1038 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1039 mkU8(64));
1040 }
1041 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1042 /* No action. Never yet found a test case. */
1043 }
1044
sewardjedccb442014-10-02 11:32:39 +00001045 /* 2, 3 */
sewardj4b06a0b2005-11-13 19:51:04 +00001046 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1047 /* long long sub/cmp, then B (unsigned less than)
1048 --> test dst <u src */
1049 return unop(Iop_1Uto64,
1050 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1051 }
sewardja9e4a802005-12-26 19:33:55 +00001052 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1053 /* long long sub/cmp, then NB (unsigned greater than or equal)
1054 --> test src <=u dst */
1055 /* Note, args are opposite way round from the usual */
1056 return unop(Iop_1Uto64,
1057 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1058 }
1059
sewardjedccb442014-10-02 11:32:39 +00001060 /* 4, 5 */
1061 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1062 /* long long sub/cmp, then Z --> test dst==src */
sewardj3cfd1f02013-08-07 09:45:08 +00001063 return unop(Iop_1Uto64,
sewardjedccb442014-10-02 11:32:39 +00001064 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1065 }
1066 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1067 /* long long sub/cmp, then NZ --> test dst!=src */
1068 return unop(Iop_1Uto64,
1069 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
sewardj3cfd1f02013-08-07 09:45:08 +00001070 }
1071
sewardjedccb442014-10-02 11:32:39 +00001072 /* 6, 7 */
sewardja9e4a802005-12-26 19:33:55 +00001073 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1074 /* long long sub/cmp, then BE (unsigned less than or equal)
1075 --> test dst <=u src */
1076 return unop(Iop_1Uto64,
1077 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1078 }
sewardj3a05a152012-02-23 07:36:43 +00001079 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1080 /* long long sub/cmp, then NBE (unsigned greater than)
1081 --> test !(dst <=u src) */
1082 return binop(Iop_Xor64,
1083 unop(Iop_1Uto64,
1084 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1085 mkU64(1));
1086 }
sewardja9e4a802005-12-26 19:33:55 +00001087
sewardjaedb8592014-10-02 16:15:30 +00001088 /* 8, 9 */
1089 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1090 /* long long sub/cmp, then S (negative)
1091 --> (dst-src)[63]
1092 --> (dst-src) >>u 63 */
1093 return binop(Iop_Shr64,
1094 binop(Iop_Sub64, cc_dep1, cc_dep2),
1095 mkU8(63));
1096 }
1097 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1098 /* long long sub/cmp, then NS (not negative)
1099 --> (dst-src)[63] ^ 1
1100 --> ((dst-src) >>u 63) ^ 1 */
1101 return binop(Iop_Xor64,
1102 binop(Iop_Shr64,
1103 binop(Iop_Sub64, cc_dep1, cc_dep2),
1104 mkU8(63)),
1105 mkU64(1));
1106 }
1107
1108 /* 12, 13 */
sewardjedccb442014-10-02 11:32:39 +00001109 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1110 /* long long sub/cmp, then L (signed less than)
1111 --> test dst <s src */
1112 return unop(Iop_1Uto64,
1113 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1114 }
sewardjaedb8592014-10-02 16:15:30 +00001115 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1116 /* long long sub/cmp, then NL (signed greater than or equal)
1117 --> test dst >=s src
1118 --> test src <=s dst */
1119 return unop(Iop_1Uto64,
1120 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1121 }
sewardjedccb442014-10-02 11:32:39 +00001122
sewardjaedb8592014-10-02 16:15:30 +00001123 /* 14, 15 */
1124 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1125 /* long long sub/cmp, then LE (signed less than or equal)
1126 --> test dst <=s src */
1127 return unop(Iop_1Uto64,
1128 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1129 }
sewardjedccb442014-10-02 11:32:39 +00001130 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1131 /* long sub/cmp, then NLE (signed greater than)
1132 --> test !(dst <=s src)
1133 --> test (dst >s src)
1134 --> test (src <s dst) */
1135 return unop(Iop_1Uto64,
1136 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1137
1138 }
1139
sewardj03540352005-04-26 01:53:48 +00001140 /*---------------- SUBL ----------------*/
1141
sewardjaedb8592014-10-02 16:15:30 +00001142 /* 0, */
1143 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1144 /* This is very commonly generated by Javascript JITs, for
1145 the idiom "do a 32-bit subtract and jump to out-of-line
1146 code if an overflow occurs". */
1147 /* long sub/cmp, then O (overflow)
1148 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1149 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1150 */
1151 vassert(isIRAtom(cc_dep1));
1152 vassert(isIRAtom(cc_dep2));
1153 return
1154 binop(Iop_And64,
1155 binop(Iop_Shr64,
1156 binop(Iop_And64,
1157 binop(Iop_Xor64, cc_dep1, cc_dep2),
1158 binop(Iop_Xor64,
1159 cc_dep1,
1160 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1161 mkU8(31)),
1162 mkU64(1));
1163 }
1164 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1165 /* No action. Never yet found a test case. */
1166 }
1167
1168 /* 2, 3 */
sewardjedccb442014-10-02 11:32:39 +00001169 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1170 /* long sub/cmp, then B (unsigned less than)
1171 --> test dst <u src */
1172 return unop(Iop_1Uto64,
1173 binop(Iop_CmpLT32U,
1174 unop(Iop_64to32, cc_dep1),
1175 unop(Iop_64to32, cc_dep2)));
1176 }
sewardjaedb8592014-10-02 16:15:30 +00001177 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1178 /* long sub/cmp, then NB (unsigned greater than or equal)
1179 --> test src <=u dst */
1180 /* Note, args are opposite way round from the usual */
1181 return unop(Iop_1Uto64,
1182 binop(Iop_CmpLE32U,
1183 unop(Iop_64to32, cc_dep2),
1184 unop(Iop_64to32, cc_dep1)));
1185 }
sewardjedccb442014-10-02 11:32:39 +00001186
1187 /* 4, 5 */
sewardjdb261e42005-05-11 23:16:43 +00001188 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1189 /* long sub/cmp, then Z --> test dst==src */
1190 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001191 binop(Iop_CmpEQ32,
1192 unop(Iop_64to32, cc_dep1),
1193 unop(Iop_64to32, cc_dep2)));
sewardja9e4a802005-12-26 19:33:55 +00001194 }
sewardja9e4a802005-12-26 19:33:55 +00001195 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1196 /* long sub/cmp, then NZ --> test dst!=src */
1197 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001198 binop(Iop_CmpNE32,
1199 unop(Iop_64to32, cc_dep1),
1200 unop(Iop_64to32, cc_dep2)));
sewardjdb261e42005-05-11 23:16:43 +00001201 }
1202
sewardjedccb442014-10-02 11:32:39 +00001203 /* 6, 7 */
1204 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1205 /* long sub/cmp, then BE (unsigned less than or equal)
1206 --> test dst <=u src */
1207 return unop(Iop_1Uto64,
1208 binop(Iop_CmpLE32U,
1209 unop(Iop_64to32, cc_dep1),
1210 unop(Iop_64to32, cc_dep2)));
1211 }
1212 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1213 /* long sub/cmp, then NBE (unsigned greater than)
1214 --> test src <u dst */
1215 /* Note, args are opposite way round from the usual */
1216 return unop(Iop_1Uto64,
1217 binop(Iop_CmpLT32U,
1218 unop(Iop_64to32, cc_dep2),
1219 unop(Iop_64to32, cc_dep1)));
1220 }
1221
sewardjaedb8592014-10-02 16:15:30 +00001222 /* 8, 9 */
sewardjedccb442014-10-02 11:32:39 +00001223 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
sewardjaedb8592014-10-02 16:15:30 +00001224 /* long sub/cmp, then S (negative)
1225 --> (dst-src)[31]
1226 --> ((dst -64 src) >>u 31) & 1
1227 Pointless to narrow the args to 32 bit before the subtract. */
1228 return binop(Iop_And64,
1229 binop(Iop_Shr64,
1230 binop(Iop_Sub64, cc_dep1, cc_dep2),
1231 mkU8(31)),
1232 mkU64(1));
1233 }
1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1235 /* long sub/cmp, then NS (not negative)
1236 --> (dst-src)[31] ^ 1
1237 --> (((dst -64 src) >>u 31) & 1) ^ 1
1238 Pointless to narrow the args to 32 bit before the subtract. */
1239 return binop(Iop_Xor64,
1240 binop(Iop_And64,
1241 binop(Iop_Shr64,
1242 binop(Iop_Sub64, cc_dep1, cc_dep2),
1243 mkU8(31)),
1244 mkU64(1)),
1245 mkU64(1));
sewardjedccb442014-10-02 11:32:39 +00001246 }
1247
sewardjaedb8592014-10-02 16:15:30 +00001248 /* 12, 13 */
sewardj03540352005-04-26 01:53:48 +00001249 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1250 /* long sub/cmp, then L (signed less than)
1251 --> test dst <s src */
sewardj6d709a92005-04-27 11:52:40 +00001252 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001253 binop(Iop_CmpLT32S,
1254 unop(Iop_64to32, cc_dep1),
1255 unop(Iop_64to32, cc_dep2)));
sewardj03540352005-04-26 01:53:48 +00001256 }
sewardjaedb8592014-10-02 16:15:30 +00001257 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1258 /* long sub/cmp, then NL (signed greater than or equal)
1259 --> test dst >=s src
1260 --> test src <=s dst */
1261 return unop(Iop_1Uto64,
1262 binop(Iop_CmpLE32S,
1263 unop(Iop_64to32, cc_dep2),
1264 unop(Iop_64to32, cc_dep1)));
1265 }
sewardj03540352005-04-26 01:53:48 +00001266
sewardjedccb442014-10-02 11:32:39 +00001267 /* 14, 15 */
sewardj03540352005-04-26 01:53:48 +00001268 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
sewardj3f81c4e2005-07-20 00:30:37 +00001269 /* long sub/cmp, then LE (signed less than or equal)
1270 --> test dst <=s src */
sewardj6d709a92005-04-27 11:52:40 +00001271 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001272 binop(Iop_CmpLE32S,
1273 unop(Iop_64to32, cc_dep1),
1274 unop(Iop_64to32, cc_dep2)));
sewardj03540352005-04-26 01:53:48 +00001275
1276 }
sewardjff6b34a2010-01-15 09:54:55 +00001277 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1278 /* long sub/cmp, then NLE (signed greater than)
1279 --> test !(dst <=s src)
1280 --> test (dst >s src)
1281 --> test (src <s dst) */
1282 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001283 binop(Iop_CmpLT32S,
1284 unop(Iop_64to32, cc_dep2),
1285 unop(Iop_64to32, cc_dep1)));
sewardjff6b34a2010-01-15 09:54:55 +00001286
1287 }
sewardj03540352005-04-26 01:53:48 +00001288
sewardj03540352005-04-26 01:53:48 +00001289 /*---------------- SUBW ----------------*/
1290
sewardj66a5e812015-02-04 19:05:13 +00001291 /* 4, 5 */
sewardja82b4762005-05-06 16:30:21 +00001292 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1293 /* word sub/cmp, then Z --> test dst==src */
1294 return unop(Iop_1Uto64,
1295 binop(Iop_CmpEQ16,
1296 unop(Iop_64to16,cc_dep1),
1297 unop(Iop_64to16,cc_dep2)));
1298 }
sewardjbeb52912008-05-02 22:15:12 +00001299 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1300 /* word sub/cmp, then NZ --> test dst!=src */
1301 return unop(Iop_1Uto64,
1302 binop(Iop_CmpNE16,
1303 unop(Iop_64to16,cc_dep1),
1304 unop(Iop_64to16,cc_dep2)));
1305 }
sewardj03540352005-04-26 01:53:48 +00001306
sewardj66a5e812015-02-04 19:05:13 +00001307 /* 6, */
sewardjaedb8592014-10-02 16:15:30 +00001308 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1309 /* word sub/cmp, then BE (unsigned less than or equal)
1310 --> test dst <=u src */
1311 return unop(Iop_1Uto64,
1312 binop(Iop_CmpLE64U,
1313 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1314 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1315 }
1316
sewardj66a5e812015-02-04 19:05:13 +00001317 /* 14, */
sewardj3f81c4e2005-07-20 00:30:37 +00001318 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
sewardj3be608d2006-05-25 18:48:12 +00001319 /* word sub/cmp, then LE (signed less than or equal)
sewardj3f81c4e2005-07-20 00:30:37 +00001320 --> test dst <=s src */
1321 return unop(Iop_1Uto64,
1322 binop(Iop_CmpLE64S,
1323 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1324 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1325
1326 }
1327
sewardj03540352005-04-26 01:53:48 +00001328 /*---------------- SUBB ----------------*/
1329
sewardj66a5e812015-02-04 19:05:13 +00001330 /* 2, 3 */
1331 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1332 /* byte sub/cmp, then B (unsigned less than)
1333 --> test dst <u src */
1334 return unop(Iop_1Uto64,
1335 binop(Iop_CmpLT64U,
1336 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1337 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1338 }
1339 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1340 /* byte sub/cmp, then NB (unsigned greater than or equal)
1341 --> test src <=u dst */
1342 /* Note, args are opposite way round from the usual */
1343 return unop(Iop_1Uto64,
1344 binop(Iop_CmpLE64U,
1345 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1346 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1347 }
1348
1349 /* 4, 5 */
sewardj03540352005-04-26 01:53:48 +00001350 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1351 /* byte sub/cmp, then Z --> test dst==src */
sewardj6d709a92005-04-27 11:52:40 +00001352 return unop(Iop_1Uto64,
sewardj03540352005-04-26 01:53:48 +00001353 binop(Iop_CmpEQ8,
sewardj6d709a92005-04-27 11:52:40 +00001354 unop(Iop_64to8,cc_dep1),
1355 unop(Iop_64to8,cc_dep2)));
sewardj03540352005-04-26 01:53:48 +00001356 }
sewardj32d615b2006-08-25 12:52:19 +00001357 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1358 /* byte sub/cmp, then NZ --> test dst!=src */
1359 return unop(Iop_1Uto64,
1360 binop(Iop_CmpNE8,
1361 unop(Iop_64to8,cc_dep1),
1362 unop(Iop_64to8,cc_dep2)));
1363 }
1364
sewardj66a5e812015-02-04 19:05:13 +00001365 /* 6, */
sewardje4304182011-06-06 10:17:46 +00001366 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1367 /* byte sub/cmp, then BE (unsigned less than or equal)
1368 --> test dst <=u src */
1369 return unop(Iop_1Uto64,
1370 binop(Iop_CmpLE64U,
1371 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1372 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1373 }
1374
sewardj66a5e812015-02-04 19:05:13 +00001375 /* 8, 9 */
sewardj3be608d2006-05-25 18:48:12 +00001376 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1377 && isU64(cc_dep2, 0)) {
1378 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1379 --> test dst <s 0
1380 --> (ULong)dst[7]
1381 This is yet another scheme by which gcc figures out if the
1382 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1383 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1384 for an 8-bit comparison, since the args to the helper
1385 function are always U64s. */
1386 return binop(Iop_And64,
1387 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1388 mkU64(1));
1389 }
sewardjcd538b42008-03-31 21:57:17 +00001390 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1391 && isU64(cc_dep2, 0)) {
1392 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1393 --> test !(dst <s 0)
1394 --> (ULong) !dst[7]
1395 */
1396 return binop(Iop_Xor64,
1397 binop(Iop_And64,
1398 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1399 mkU64(1)),
1400 mkU64(1));
1401 }
sewardj3be608d2006-05-25 18:48:12 +00001402
sewardj4b06a0b2005-11-13 19:51:04 +00001403 /*---------------- LOGICQ ----------------*/
1404
1405 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1406 /* long long and/or/xor, then Z --> test dst==0 */
1407 return unop(Iop_1Uto64,
1408 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1409 }
sewardj0cd74732011-07-07 13:58:10 +00001410 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1411 /* long long and/or/xor, then NZ --> test dst!=0 */
1412 return unop(Iop_1Uto64,
1413 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1414 }
sewardj4b06a0b2005-11-13 19:51:04 +00001415
sewardj77fd8462005-11-13 20:30:24 +00001416 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1417 /* long long and/or/xor, then L
1418 LOGIC sets SF and ZF according to the
1419 result and makes OF be zero. L computes SF ^ OF, but
1420 OF is zero, so this reduces to SF -- which will be 1 iff
1421 the result is < signed 0. Hence ...
1422 */
1423 return unop(Iop_1Uto64,
1424 binop(Iop_CmpLT64S,
1425 cc_dep1,
1426 mkU64(0)));
1427 }
1428
sewardj03540352005-04-26 01:53:48 +00001429 /*---------------- LOGICL ----------------*/
1430
1431 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1432 /* long and/or/xor, then Z --> test dst==0 */
sewardj6d709a92005-04-27 11:52:40 +00001433 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001434 binop(Iop_CmpEQ32,
1435 unop(Iop_64to32, cc_dep1),
1436 mkU32(0)));
sewardj03540352005-04-26 01:53:48 +00001437 }
sewardj005b4ef2005-07-20 01:12:48 +00001438 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1439 /* long and/or/xor, then NZ --> test dst!=0 */
1440 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001441 binop(Iop_CmpNE32,
1442 unop(Iop_64to32, cc_dep1),
1443 mkU32(0)));
sewardj005b4ef2005-07-20 01:12:48 +00001444 }
1445
sewardj03540352005-04-26 01:53:48 +00001446 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1447 /* long and/or/xor, then LE
1448 This is pretty subtle. LOGIC sets SF and ZF according to the
sewardj77fd8462005-11-13 20:30:24 +00001449 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1450 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
sewardj03540352005-04-26 01:53:48 +00001451 the result is <=signed 0. Hence ...
1452 */
sewardj6d709a92005-04-27 11:52:40 +00001453 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001454 binop(Iop_CmpLE32S,
1455 unop(Iop_64to32, cc_dep1),
1456 mkU32(0)));
sewardj03540352005-04-26 01:53:48 +00001457 }
1458
sewardje4304182011-06-06 10:17:46 +00001459 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1460 /* long and/or/xor, then S --> (ULong)result[31] */
1461 return binop(Iop_And64,
1462 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1463 mkU64(1));
1464 }
1465 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1466 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1467 return binop(Iop_Xor64,
1468 binop(Iop_And64,
1469 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1470 mkU64(1)),
1471 mkU64(1));
1472 }
1473
sewardj61acf4c2012-04-25 14:33:03 +00001474 /*---------------- LOGICW ----------------*/
1475
1476 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1477 /* word and/or/xor, then Z --> test dst==0 */
1478 return unop(Iop_1Uto64,
1479 binop(Iop_CmpEQ64,
1480 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1481 mkU64(0)));
1482 }
1483 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1484 /* word and/or/xor, then NZ --> test dst!=0 */
1485 return unop(Iop_1Uto64,
1486 binop(Iop_CmpNE64,
1487 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1488 mkU64(0)));
1489 }
1490
sewardj4b06a0b2005-11-13 19:51:04 +00001491 /*---------------- LOGICB ----------------*/
1492
1493 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1494 /* byte and/or/xor, then Z --> test dst==0 */
1495 return unop(Iop_1Uto64,
1496 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1497 mkU64(0)));
1498 }
sewardjff6b34a2010-01-15 09:54:55 +00001499 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1500 /* byte and/or/xor, then NZ --> test dst!=0 */
1501 return unop(Iop_1Uto64,
1502 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1503 mkU64(0)));
1504 }
sewardj3f81c4e2005-07-20 00:30:37 +00001505
sewardj346d9a12006-05-21 01:02:31 +00001506 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1507 /* this is an idiom gcc sometimes uses to find out if the top
1508 bit of a byte register is set: eg testb %al,%al; js ..
1509 Since it just depends on the top bit of the byte, extract
1510 that bit and explicitly get rid of all the rest. This
1511 helps memcheck avoid false positives in the case where any
1512 of the other bits in the byte are undefined. */
1513 /* byte and/or/xor, then S --> (UInt)result[7] */
1514 return binop(Iop_And64,
1515 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1516 mkU64(1));
1517 }
sewardja6d08092011-03-27 22:16:08 +00001518 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1519 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1520 return binop(Iop_Xor64,
1521 binop(Iop_And64,
1522 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1523 mkU64(1)),
1524 mkU64(1));
1525 }
sewardj346d9a12006-05-21 01:02:31 +00001526
sewardj3f81c4e2005-07-20 00:30:37 +00001527 /*---------------- INCB ----------------*/
1528
1529 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
sewardj4df975f2010-02-28 04:51:02 +00001530 /* 8-bit inc, then LE --> sign bit of the arg */
1531 return binop(Iop_And64,
1532 binop(Iop_Shr64,
1533 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1534 mkU8(7)),
1535 mkU64(1));
sewardj3f81c4e2005-07-20 00:30:37 +00001536 }
1537
sewardj7784bd22006-12-29 01:54:36 +00001538 /*---------------- INCW ----------------*/
1539
1540 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1541 /* 16-bit inc, then Z --> test dst == 0 */
1542 return unop(Iop_1Uto64,
1543 binop(Iop_CmpEQ64,
1544 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1545 mkU64(0)));
1546 }
1547
sewardj77fd8462005-11-13 20:30:24 +00001548 /*---------------- DECL ----------------*/
1549
1550 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1551 /* dec L, then Z --> test dst == 0 */
1552 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001553 binop(Iop_CmpEQ32,
1554 unop(Iop_64to32, cc_dep1),
1555 mkU32(0)));
sewardj77fd8462005-11-13 20:30:24 +00001556 }
1557
sewardjb6d02ea2005-08-01 13:35:18 +00001558 /*---------------- DECW ----------------*/
1559
1560 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1561 /* 16-bit dec, then NZ --> test dst != 0 */
1562 return unop(Iop_1Uto64,
1563 binop(Iop_CmpNE64,
1564 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1565 mkU64(0)));
1566 }
1567
sewardj7fc494b2005-05-05 12:05:11 +00001568 /*---------------- COPY ----------------*/
1569 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1570 jbe" for example. */
1571
1572 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1573 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1574 /* COPY, then BE --> extract C and Z from dep1, and test (C
1575 or Z == 1). */
1576 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1577 or Z == 0). */
1578 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1579 return
1580 unop(
1581 Iop_1Uto64,
1582 binop(
1583 Iop_CmpEQ64,
1584 binop(
1585 Iop_And64,
1586 binop(
1587 Iop_Or64,
1588 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1589 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1590 ),
1591 mkU64(1)
1592 ),
1593 mkU64(nnn)
1594 )
1595 );
1596 }
1597
sewardj9f05a642005-05-12 02:14:52 +00001598 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1599 /* COPY, then B --> extract C dep1, and test (C == 1). */
1600 return
1601 unop(
1602 Iop_1Uto64,
1603 binop(
1604 Iop_CmpNE64,
1605 binop(
1606 Iop_And64,
1607 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1608 mkU64(1)
1609 ),
1610 mkU64(0)
1611 )
1612 );
1613 }
sewardj03540352005-04-26 01:53:48 +00001614
sewardjb235e5b2006-11-27 04:09:52 +00001615 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1616 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1617 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1618 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1619 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1620 return
1621 unop(
1622 Iop_1Uto64,
1623 binop(
1624 Iop_CmpEQ64,
1625 binop(
1626 Iop_And64,
1627 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1628 mkU64(1)
1629 ),
1630 mkU64(nnn)
1631 )
1632 );
1633 }
1634
1635 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1636 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1637 return
1638 unop(
1639 Iop_1Uto64,
1640 binop(
1641 Iop_CmpNE64,
1642 binop(
1643 Iop_And64,
1644 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1645 mkU64(1)
1646 ),
1647 mkU64(0)
1648 )
1649 );
1650 }
1651
sewardj03540352005-04-26 01:53:48 +00001652 return NULL;
1653 }
1654
1655 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1656
1657 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1658 /* specialise calls to above "calculate_rflags_c" function */
1659 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1660 vassert(arity == 4);
1661 cc_op = args[0];
1662 cc_dep1 = args[1];
1663 cc_dep2 = args[2];
1664 cc_ndep = args[3];
1665
sewardj77fd8462005-11-13 20:30:24 +00001666 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1667 /* C after sub denotes unsigned less than */
1668 return unop(Iop_1Uto64,
1669 binop(Iop_CmpLT64U,
1670 cc_dep1,
1671 cc_dep2));
1672 }
sewardj03540352005-04-26 01:53:48 +00001673 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1674 /* C after sub denotes unsigned less than */
sewardj6d709a92005-04-27 11:52:40 +00001675 return unop(Iop_1Uto64,
sewardj9cc2bbf2011-06-05 17:56:03 +00001676 binop(Iop_CmpLT32U,
1677 unop(Iop_64to32, cc_dep1),
1678 unop(Iop_64to32, cc_dep2)));
sewardj03540352005-04-26 01:53:48 +00001679 }
1680 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1681 /* C after sub denotes unsigned less than */
sewardj6d709a92005-04-27 11:52:40 +00001682 return unop(Iop_1Uto64,
sewardj03540352005-04-26 01:53:48 +00001683 binop(Iop_CmpLT64U,
1684 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
sewardj6d709a92005-04-27 11:52:40 +00001685 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
sewardj03540352005-04-26 01:53:48 +00001686 }
1687 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1688 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1689 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1690 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1691 /* cflag after logic is zero */
1692 return mkU64(0);
1693 }
1694 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1695 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1696 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1697 return cc_ndep;
1698 }
sewardj7784bd22006-12-29 01:54:36 +00001699
1700# if 0
1701 if (cc_op->tag == Iex_Const) {
1702 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1703 }
1704# endif
sewardj03540352005-04-26 01:53:48 +00001705
1706 return NULL;
1707 }
1708
sewardjf8c37f72005-02-07 18:55:29 +00001709# undef unop
1710# undef binop
sewardj03540352005-04-26 01:53:48 +00001711# undef mkU64
sewardj9cc2bbf2011-06-05 17:56:03 +00001712# undef mkU32
sewardjf8c37f72005-02-07 18:55:29 +00001713# undef mkU8
1714
1715 return NULL;
sewardj44d494d2005-01-20 20:26:33 +00001716}
1717
sewardjf8c37f72005-02-07 18:55:29 +00001718
sewardj8d965312005-02-25 02:48:47 +00001719/*---------------------------------------------------------------*/
1720/*--- Supporting functions for x87 FPU activities. ---*/
1721/*---------------------------------------------------------------*/
1722
sewardj4f9847d2005-07-25 11:58:34 +00001723static inline Bool host_is_little_endian ( void )
1724{
1725 UInt x = 0x76543210;
1726 UChar* p = (UChar*)(&x);
1727 return toBool(*p == 0x10);
1728}
1729
1730/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1731/* CALLED FROM GENERATED CODE: CLEAN HELPER */
1732ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1733{
1734 Bool mantissaIsZero;
1735 Int bexp;
1736 UChar sign;
1737 UChar* f64;
1738
1739 vassert(host_is_little_endian());
1740
1741 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1742
1743 f64 = (UChar*)(&dbl);
1744 sign = toUChar( (f64[7] >> 7) & 1 );
1745
1746 /* First off, if the tag indicates the register was empty,
1747 return 1,0,sign,1 */
1748 if (tag == 0) {
1749 /* vex_printf("Empty\n"); */
1750 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1751 | AMD64G_FC_MASK_C0;
1752 }
1753
1754 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1755 bexp &= 0x7FF;
1756
1757 mantissaIsZero
1758 = toBool(
1759 (f64[6] & 0x0F) == 0
1760 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1761 );
1762
1763 /* If both exponent and mantissa are zero, the value is zero.
1764 Return 1,0,sign,0. */
1765 if (bexp == 0 && mantissaIsZero) {
1766 /* vex_printf("Zero\n"); */
1767 return AMD64G_FC_MASK_C3 | 0
1768 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1769 }
1770
1771 /* If exponent is zero but mantissa isn't, it's a denormal.
1772 Return 1,1,sign,0. */
1773 if (bexp == 0 && !mantissaIsZero) {
1774 /* vex_printf("Denormal\n"); */
1775 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1776 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1777 }
1778
1779 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1780 Return 0,1,sign,1. */
1781 if (bexp == 0x7FF && mantissaIsZero) {
1782 /* vex_printf("Inf\n"); */
1783 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1784 | AMD64G_FC_MASK_C0;
1785 }
1786
1787 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1788 Return 0,0,sign,1. */
1789 if (bexp == 0x7FF && !mantissaIsZero) {
1790 /* vex_printf("NaN\n"); */
1791 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1792 }
1793
1794 /* Uh, ok, we give up. It must be a normal finite number.
1795 Return 0,1,sign,0.
1796 */
1797 /* vex_printf("normal\n"); */
1798 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1799}
1800
1801
sewardj5556e5e2011-01-21 18:05:19 +00001802/* This is used to implement both 'frstor' and 'fldenv'. The latter
1803 appears to differ from the former only in that the 8 FP registers
1804 themselves are not transferred into the guest state. */
1805static
florian6ef84be2012-08-26 03:20:07 +00001806VexEmNote do_put_x87 ( Bool moveRegs,
sewardj5556e5e2011-01-21 18:05:19 +00001807 /*IN*/UChar* x87_state,
1808 /*OUT*/VexGuestAMD64State* vex_state )
1809{
1810 Int stno, preg;
1811 UInt tag;
1812 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1813 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1814 Fpu_State* x87 = (Fpu_State*)x87_state;
1815 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1816 UInt tagw = x87->env[FP_ENV_TAG];
1817 UInt fpucw = x87->env[FP_ENV_CTRL];
1818 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
florian6ef84be2012-08-26 03:20:07 +00001819 VexEmNote ew;
sewardj5556e5e2011-01-21 18:05:19 +00001820 UInt fpround;
1821 ULong pair;
1822
1823 /* Copy registers and tags */
1824 for (stno = 0; stno < 8; stno++) {
1825 preg = (stno + ftop) & 7;
1826 tag = (tagw >> (2*preg)) & 3;
1827 if (tag == 3) {
1828 /* register is empty */
1829 /* hmm, if it's empty, does it still get written? Probably
1830 safer to say it does. If we don't, memcheck could get out
1831 of sync, in that it thinks all FP registers are defined by
1832 this helper, but in reality some have not been updated. */
1833 if (moveRegs)
1834 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1835 vexTags[preg] = 0;
1836 } else {
1837 /* register is non-empty */
1838 if (moveRegs)
1839 convert_f80le_to_f64le( &x87->reg[10*stno],
1840 (UChar*)&vexRegs[preg] );
1841 vexTags[preg] = 1;
1842 }
1843 }
1844
1845 /* stack pointer */
1846 vex_state->guest_FTOP = ftop;
1847
1848 /* status word */
1849 vex_state->guest_FC3210 = c3210;
1850
1851 /* handle the control word, setting FPROUND and detecting any
1852 emulation warnings. */
1853 pair = amd64g_check_fldcw ( (ULong)fpucw );
sewardj9ae42a72012-02-16 14:18:56 +00001854 fpround = (UInt)pair & 0xFFFFFFFFULL;
florian6ef84be2012-08-26 03:20:07 +00001855 ew = (VexEmNote)(pair >> 32);
sewardj5556e5e2011-01-21 18:05:19 +00001856
1857 vex_state->guest_FPROUND = fpround & 3;
1858
1859 /* emulation warnings --> caller */
1860 return ew;
1861}
1862
1863
sewardj5abcfe62007-01-10 04:59:33 +00001864/* Create an x87 FPU state from the guest state, as close as
1865 we can approximate it. */
1866static
1867void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1868 /*OUT*/UChar* x87_state )
1869{
1870 Int i, stno, preg;
1871 UInt tagw;
1872 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1873 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1874 Fpu_State* x87 = (Fpu_State*)x87_state;
1875 UInt ftop = vex_state->guest_FTOP;
1876 UInt c3210 = vex_state->guest_FC3210;
1877
1878 for (i = 0; i < 14; i++)
1879 x87->env[i] = 0;
1880
1881 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1882 x87->env[FP_ENV_STAT]
1883 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1884 x87->env[FP_ENV_CTRL]
1885 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1886
1887 /* Dump the register stack in ST order. */
1888 tagw = 0;
1889 for (stno = 0; stno < 8; stno++) {
1890 preg = (stno + ftop) & 7;
1891 if (vexTags[preg] == 0) {
1892 /* register is empty */
1893 tagw |= (3 << (2*preg));
1894 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1895 &x87->reg[10*stno] );
1896 } else {
1897 /* register is full. */
1898 tagw |= (0 << (2*preg));
1899 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1900 &x87->reg[10*stno] );
1901 }
1902 }
1903 x87->env[FP_ENV_TAG] = toUShort(tagw);
1904}
1905
1906
1907/* CALLED FROM GENERATED CODE */
1908/* DIRTY HELPER (reads guest state, writes guest mem) */
1909/* NOTE: only handles 32-bit format (no REX.W on the insn) */
sewardj28d71ed2014-09-07 23:23:17 +00001910void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
1911 HWord addr )
sewardj5abcfe62007-01-10 04:59:33 +00001912{
1913 /* Derived from values obtained from
1914 vendor_id : AuthenticAMD
1915 cpu family : 15
1916 model : 12
1917 model name : AMD Athlon(tm) 64 Processor 3200+
1918 stepping : 0
1919 cpu MHz : 2200.000
1920 cache size : 512 KB
1921 */
1922 /* Somewhat roundabout, but at least it's simple. */
1923 Fpu_State tmp;
1924 UShort* addrS = (UShort*)addr;
1925 UChar* addrC = (UChar*)addr;
sewardj5abcfe62007-01-10 04:59:33 +00001926 UInt mxcsr;
1927 UShort fp_tags;
1928 UInt summary_tags;
1929 Int r, stno;
1930 UShort *srcS, *dstS;
1931
1932 do_get_x87( gst, (UChar*)&tmp );
1933 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1934
1935 /* Now build the proper fxsave image from the x87 image we just
1936 made. */
1937
1938 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1939 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1940
1941 /* set addrS[2] in an endian-independent way */
1942 summary_tags = 0;
1943 fp_tags = tmp.env[FP_ENV_TAG];
1944 for (r = 0; r < 8; r++) {
1945 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1946 summary_tags |= (1 << r);
1947 }
1948 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1949 addrC[5] = 0; /* pad */
1950
1951 /* FOP: faulting fpu opcode. From experimentation, the real CPU
1952 does not write this field. (?!) */
1953 addrS[3] = 0; /* BOGUS */
1954
1955 /* RIP (Last x87 instruction pointer). From experimentation, the
1956 real CPU does not write this field. (?!) */
1957 addrS[4] = 0; /* BOGUS */
1958 addrS[5] = 0; /* BOGUS */
1959 addrS[6] = 0; /* BOGUS */
1960 addrS[7] = 0; /* BOGUS */
1961
1962 /* RDP (Last x87 data pointer). From experimentation, the real CPU
1963 does not write this field. (?!) */
1964 addrS[8] = 0; /* BOGUS */
1965 addrS[9] = 0; /* BOGUS */
1966 addrS[10] = 0; /* BOGUS */
1967 addrS[11] = 0; /* BOGUS */
1968
1969 addrS[12] = toUShort(mxcsr); /* MXCSR */
1970 addrS[13] = toUShort(mxcsr >> 16);
1971
1972 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1973 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1974
1975 /* Copy in the FP registers, in ST order. */
1976 for (stno = 0; stno < 8; stno++) {
1977 srcS = (UShort*)(&tmp.reg[10*stno]);
1978 dstS = (UShort*)(&addrS[16 + 8*stno]);
1979 dstS[0] = srcS[0];
1980 dstS[1] = srcS[1];
1981 dstS[2] = srcS[2];
1982 dstS[3] = srcS[3];
1983 dstS[4] = srcS[4];
1984 dstS[5] = 0;
1985 dstS[6] = 0;
1986 dstS[7] = 0;
1987 }
1988
1989 /* That's the first 160 bytes of the image done. Now only %xmm0
sewardj28d71ed2014-09-07 23:23:17 +00001990 .. %xmm15 remain to be copied, and we let the generated IR do
1991 that, so as to make Memcheck's definedness flow for the non-XMM
1992 parts independant from that of the all the other control and
1993 status words in the structure. This avoids the false positives
1994 shown in #291310. */
sewardj5abcfe62007-01-10 04:59:33 +00001995}
1996
1997
sewardj5556e5e2011-01-21 18:05:19 +00001998/* CALLED FROM GENERATED CODE */
1999/* DIRTY HELPER (writes guest state, reads guest mem) */
sewardj28d71ed2014-09-07 23:23:17 +00002000VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
2001 HWord addr )
sewardj5556e5e2011-01-21 18:05:19 +00002002{
2003 Fpu_State tmp;
florian6ef84be2012-08-26 03:20:07 +00002004 VexEmNote warnX87 = EmNote_NONE;
2005 VexEmNote warnXMM = EmNote_NONE;
sewardj5556e5e2011-01-21 18:05:19 +00002006 UShort* addrS = (UShort*)addr;
2007 UChar* addrC = (UChar*)addr;
sewardj5556e5e2011-01-21 18:05:19 +00002008 UShort fp_tags;
2009 Int r, stno, i;
2010
sewardj28d71ed2014-09-07 23:23:17 +00002011 /* Don't restore %xmm0 .. %xmm15, for the same reasons that
2012 amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See
2013 comment in that function for details. */
sewardj5556e5e2011-01-21 18:05:19 +00002014
2015 /* Copy the x87 registers out of the image, into a temporary
2016 Fpu_State struct. */
2017 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2018 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2019 /* fill in tmp.reg[0..7] */
2020 for (stno = 0; stno < 8; stno++) {
2021 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2022 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2023 dstS[0] = srcS[0];
2024 dstS[1] = srcS[1];
2025 dstS[2] = srcS[2];
2026 dstS[3] = srcS[3];
2027 dstS[4] = srcS[4];
2028 }
2029 /* fill in tmp.env[0..13] */
2030 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2031 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2032
2033 fp_tags = 0;
2034 for (r = 0; r < 8; r++) {
2035 if (addrC[4] & (1<<r))
2036 fp_tags |= (0 << (2*r)); /* EMPTY */
2037 else
2038 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2039 }
2040 tmp.env[FP_ENV_TAG] = fp_tags;
2041
2042 /* Now write 'tmp' into the guest state. */
2043 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
2044
2045 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2046 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2047 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2048
florian6ef84be2012-08-26 03:20:07 +00002049 warnXMM = (VexEmNote)(w64 >> 32);
sewardj5556e5e2011-01-21 18:05:19 +00002050
2051 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2052 }
2053
2054 /* Prefer an X87 emwarn over an XMM one, if both exist. */
florian6ef84be2012-08-26 03:20:07 +00002055 if (warnX87 != EmNote_NONE)
sewardj5556e5e2011-01-21 18:05:19 +00002056 return warnX87;
2057 else
2058 return warnXMM;
2059}
2060
2061
sewardj0585a032005-11-05 02:55:06 +00002062/* DIRTY HELPER (writes guest state) */
sewardj8d965312005-02-25 02:48:47 +00002063/* Initialise the x87 FPU state as per 'finit'. */
sewardj8d965312005-02-25 02:48:47 +00002064void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2065{
2066 Int i;
2067 gst->guest_FTOP = 0;
2068 for (i = 0; i < 8; i++) {
2069 gst->guest_FPTAG[i] = 0; /* empty */
2070 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2071 }
2072 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2073 gst->guest_FC3210 = 0;
2074}
2075
sewardjd0a12df2005-02-10 02:07:43 +00002076
sewardj924215b2005-03-26 21:50:31 +00002077/* CALLED FROM GENERATED CODE */
2078/* DIRTY HELPER (reads guest memory) */
florianbdf99f02015-01-04 17:20:19 +00002079ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
sewardj924215b2005-03-26 21:50:31 +00002080{
2081 ULong f64;
florianbdf99f02015-01-04 17:20:19 +00002082 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
sewardj924215b2005-03-26 21:50:31 +00002083 return f64;
2084}
2085
2086/* CALLED FROM GENERATED CODE */
2087/* DIRTY HELPER (writes guest memory) */
florianbdf99f02015-01-04 17:20:19 +00002088void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
sewardj924215b2005-03-26 21:50:31 +00002089{
florianbdf99f02015-01-04 17:20:19 +00002090 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
sewardj924215b2005-03-26 21:50:31 +00002091}
2092
2093
sewardjbcbb9de2005-03-27 02:22:32 +00002094/* CALLED FROM GENERATED CODE */
2095/* CLEAN HELPER */
2096/* mxcsr[15:0] contains a SSE native format MXCSR value.
2097 Extract from it the required SSEROUND value and any resulting
2098 emulation warning, and return (warn << 32) | sseround value.
2099*/
2100ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2101{
2102 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2103 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2104 ULong rmode = (mxcsr >> 13) & 3;
2105
2106 /* Detect any required emulation warnings. */
florian6ef84be2012-08-26 03:20:07 +00002107 VexEmNote ew = EmNote_NONE;
sewardjbcbb9de2005-03-27 02:22:32 +00002108
2109 if ((mxcsr & 0x1F80) != 0x1F80) {
2110 /* unmasked exceptions! */
2111 ew = EmWarn_X86_sseExns;
2112 }
2113 else
2114 if (mxcsr & (1<<15)) {
2115 /* FZ is set */
2116 ew = EmWarn_X86_fz;
2117 }
2118 else
2119 if (mxcsr & (1<<6)) {
2120 /* DAZ is set */
2121 ew = EmWarn_X86_daz;
2122 }
2123
2124 return (((ULong)ew) << 32) | ((ULong)rmode);
2125}
2126
2127
2128/* CALLED FROM GENERATED CODE */
2129/* CLEAN HELPER */
2130/* Given sseround as an IRRoundingMode value, create a suitable SSE
2131 native format MXCSR value. */
2132ULong amd64g_create_mxcsr ( ULong sseround )
2133{
2134 sseround &= 3;
2135 return 0x1F80 | (sseround << 13);
2136}
2137
2138
sewardj5e205372005-05-09 02:57:08 +00002139/* CLEAN HELPER */
2140/* fpucw[15:0] contains a x87 native format FPU control word.
2141 Extract from it the required FPROUND value and any resulting
2142 emulation warning, and return (warn << 32) | fpround value.
2143*/
2144ULong amd64g_check_fldcw ( ULong fpucw )
2145{
2146 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2147 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2148 ULong rmode = (fpucw >> 10) & 3;
2149
2150 /* Detect any required emulation warnings. */
florian6ef84be2012-08-26 03:20:07 +00002151 VexEmNote ew = EmNote_NONE;
sewardj5e205372005-05-09 02:57:08 +00002152
2153 if ((fpucw & 0x3F) != 0x3F) {
2154 /* unmasked exceptions! */
2155 ew = EmWarn_X86_x87exns;
2156 }
2157 else
2158 if (((fpucw >> 8) & 3) != 3) {
2159 /* unsupported precision */
2160 ew = EmWarn_X86_x87precision;
2161 }
2162
2163 return (((ULong)ew) << 32) | ((ULong)rmode);
2164}
2165
2166
2167/* CLEAN HELPER */
2168/* Given fpround as an IRRoundingMode value, create a suitable x87
2169 native format FPU control word. */
2170ULong amd64g_create_fpucw ( ULong fpround )
2171{
2172 fpround &= 3;
2173 return 0x037F | (fpround << 10);
2174}
2175
sewardjbcbb9de2005-03-27 02:22:32 +00002176
sewardj4017a3b2005-06-13 12:17:27 +00002177/* This is used to implement 'fldenv'.
2178 Reads 28 bytes at x87_state[0 .. 27]. */
2179/* CALLED FROM GENERATED CODE */
2180/* DIRTY HELPER */
florian6ef84be2012-08-26 03:20:07 +00002181VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
sewardj4017a3b2005-06-13 12:17:27 +00002182 /*IN*/HWord x87_state)
2183{
sewardj9ae42a72012-02-16 14:18:56 +00002184 return do_put_x87( False, (UChar*)x87_state, vex_state );
sewardj4017a3b2005-06-13 12:17:27 +00002185}
2186
2187
2188/* CALLED FROM GENERATED CODE */
2189/* DIRTY HELPER */
2190/* Create an x87 FPU env from the guest state, as close as we can
2191 approximate it. Writes 28 bytes at x87_state[0..27]. */
2192void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2193 /*OUT*/HWord x87_state )
2194{
2195 Int i, stno, preg;
2196 UInt tagw;
2197 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2198 Fpu_State* x87 = (Fpu_State*)x87_state;
2199 UInt ftop = vex_state->guest_FTOP;
2200 ULong c3210 = vex_state->guest_FC3210;
2201
2202 for (i = 0; i < 14; i++)
2203 x87->env[i] = 0;
2204
2205 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2206 x87->env[FP_ENV_STAT]
sewardj81d72ea2005-06-14 21:59:16 +00002207 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
sewardj4017a3b2005-06-13 12:17:27 +00002208 x87->env[FP_ENV_CTRL]
sewardj81d72ea2005-06-14 21:59:16 +00002209 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
sewardj4017a3b2005-06-13 12:17:27 +00002210
2211 /* Compute the x87 tag word. */
2212 tagw = 0;
2213 for (stno = 0; stno < 8; stno++) {
2214 preg = (stno + ftop) & 7;
2215 if (vexTags[preg] == 0) {
2216 /* register is empty */
2217 tagw |= (3 << (2*preg));
2218 } else {
2219 /* register is full. */
2220 tagw |= (0 << (2*preg));
2221 }
2222 }
2223 x87->env[FP_ENV_TAG] = toUShort(tagw);
2224
2225 /* We don't dump the x87 registers, tho. */
2226}
2227
2228
sewardj9ae42a72012-02-16 14:18:56 +00002229/* This is used to implement 'fnsave'.
2230 Writes 108 bytes at x87_state[0 .. 107]. */
2231/* CALLED FROM GENERATED CODE */
2232/* DIRTY HELPER */
2233void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2234 /*OUT*/HWord x87_state)
2235{
2236 do_get_x87( vex_state, (UChar*)x87_state );
2237}
2238
2239
2240/* This is used to implement 'fnsaves'.
2241 Writes 94 bytes at x87_state[0 .. 93]. */
2242/* CALLED FROM GENERATED CODE */
2243/* DIRTY HELPER */
2244void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2245 /*OUT*/HWord x87_state)
2246{
2247 Int i, stno, preg;
2248 UInt tagw;
2249 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2250 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2251 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2252 UInt ftop = vex_state->guest_FTOP;
2253 UInt c3210 = vex_state->guest_FC3210;
2254
2255 for (i = 0; i < 7; i++)
2256 x87->env[i] = 0;
2257
2258 x87->env[FPS_ENV_STAT]
2259 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2260 x87->env[FPS_ENV_CTRL]
2261 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2262
2263 /* Dump the register stack in ST order. */
2264 tagw = 0;
2265 for (stno = 0; stno < 8; stno++) {
2266 preg = (stno + ftop) & 7;
2267 if (vexTags[preg] == 0) {
2268 /* register is empty */
2269 tagw |= (3 << (2*preg));
2270 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2271 &x87->reg[10*stno] );
2272 } else {
2273 /* register is full. */
2274 tagw |= (0 << (2*preg));
2275 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2276 &x87->reg[10*stno] );
2277 }
2278 }
2279 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2280}
2281
2282
2283/* This is used to implement 'frstor'.
2284 Reads 108 bytes at x87_state[0 .. 107]. */
2285/* CALLED FROM GENERATED CODE */
2286/* DIRTY HELPER */
florian6ef84be2012-08-26 03:20:07 +00002287VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
sewardj9ae42a72012-02-16 14:18:56 +00002288 /*IN*/HWord x87_state)
2289{
2290 return do_put_x87( True, (UChar*)x87_state, vex_state );
2291}
2292
2293
2294/* This is used to implement 'frstors'.
2295 Reads 94 bytes at x87_state[0 .. 93]. */
2296/* CALLED FROM GENERATED CODE */
2297/* DIRTY HELPER */
florian6ef84be2012-08-26 03:20:07 +00002298VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
sewardj9ae42a72012-02-16 14:18:56 +00002299 /*IN*/HWord x87_state)
2300{
2301 Int stno, preg;
2302 UInt tag;
2303 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2304 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2305 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2306 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2307 UInt tagw = x87->env[FPS_ENV_TAG];
2308 UInt fpucw = x87->env[FPS_ENV_CTRL];
2309 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
florian6ef84be2012-08-26 03:20:07 +00002310 VexEmNote ew;
sewardj9ae42a72012-02-16 14:18:56 +00002311 UInt fpround;
2312 ULong pair;
2313
2314 /* Copy registers and tags */
2315 for (stno = 0; stno < 8; stno++) {
2316 preg = (stno + ftop) & 7;
2317 tag = (tagw >> (2*preg)) & 3;
2318 if (tag == 3) {
2319 /* register is empty */
2320 /* hmm, if it's empty, does it still get written? Probably
2321 safer to say it does. If we don't, memcheck could get out
2322 of sync, in that it thinks all FP registers are defined by
2323 this helper, but in reality some have not been updated. */
2324 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2325 vexTags[preg] = 0;
2326 } else {
2327 /* register is non-empty */
2328 convert_f80le_to_f64le( &x87->reg[10*stno],
2329 (UChar*)&vexRegs[preg] );
2330 vexTags[preg] = 1;
2331 }
2332 }
2333
2334 /* stack pointer */
2335 vex_state->guest_FTOP = ftop;
2336
2337 /* status word */
2338 vex_state->guest_FC3210 = c3210;
2339
2340 /* handle the control word, setting FPROUND and detecting any
2341 emulation warnings. */
2342 pair = amd64g_check_fldcw ( (ULong)fpucw );
2343 fpround = (UInt)pair & 0xFFFFFFFFULL;
florian6ef84be2012-08-26 03:20:07 +00002344 ew = (VexEmNote)(pair >> 32);
sewardj9ae42a72012-02-16 14:18:56 +00002345
2346 vex_state->guest_FPROUND = fpround & 3;
2347
2348 /* emulation warnings --> caller */
2349 return ew;
2350}
2351
2352
sewardjd0a12df2005-02-10 02:07:43 +00002353/*---------------------------------------------------------------*/
2354/*--- Misc integer helpers, including rotates and CPUID. ---*/
2355/*---------------------------------------------------------------*/
2356
sewardje9d8a262009-07-01 08:06:34 +00002357/* Claim to be the following CPU, which is probably representative of
2358 the lowliest (earliest) amd64 offerings. It can do neither sse3
2359 nor cx16.
2360
2361 vendor_id : AuthenticAMD
2362 cpu family : 15
2363 model : 5
2364 model name : AMD Opteron (tm) Processor 848
2365 stepping : 10
2366 cpu MHz : 1797.682
2367 cache size : 1024 KB
2368 fpu : yes
2369 fpu_exception : yes
2370 cpuid level : 1
2371 wp : yes
2372 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2373 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2374 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2375 bogomips : 3600.62
2376 TLB size : 1088 4K pages
2377 clflush size : 64
2378 cache_alignment : 64
2379 address sizes : 40 bits physical, 48 bits virtual
sewardj1aa3aef2012-02-21 08:53:54 +00002380 power management: ts fid vid ttp
2381
2382 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2383 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2384 and 3dnowext is 80000001.EDX.30.
sewardje9d8a262009-07-01 08:06:34 +00002385*/
2386void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2387{
2388# define SET_ABCD(_a,_b,_c,_d) \
2389 do { st->guest_RAX = (ULong)(_a); \
2390 st->guest_RBX = (ULong)(_b); \
2391 st->guest_RCX = (ULong)(_c); \
2392 st->guest_RDX = (ULong)(_d); \
2393 } while (0)
2394
2395 switch (0xFFFFFFFF & st->guest_RAX) {
2396 case 0x00000000:
2397 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2398 break;
2399 case 0x00000001:
2400 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2401 break;
2402 case 0x80000000:
2403 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2404 break;
2405 case 0x80000001:
sewardj1aa3aef2012-02-21 08:53:54 +00002406 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
2407 the original it-is-supported value that the h/w provides.
2408 See #291568. */
2409 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2410 0x21d3fbff);
sewardje9d8a262009-07-01 08:06:34 +00002411 break;
2412 case 0x80000002:
2413 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2414 break;
2415 case 0x80000003:
2416 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2417 break;
2418 case 0x80000004:
2419 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2420 break;
2421 case 0x80000005:
2422 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2423 break;
2424 case 0x80000006:
2425 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2426 break;
2427 case 0x80000007:
2428 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2429 break;
2430 case 0x80000008:
2431 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2432 break;
2433 default:
2434 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2435 break;
2436 }
2437# undef SET_ABCD
2438}
2439
2440
2441/* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2442 capable.
2443
sewardj150c9cd2008-02-09 01:16:02 +00002444 vendor_id : GenuineIntel
2445 cpu family : 6
2446 model : 15
2447 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2448 stepping : 6
2449 cpu MHz : 2394.000
2450 cache size : 4096 KB
2451 physical id : 0
2452 siblings : 2
2453 core id : 0
2454 cpu cores : 2
sewardjd0a12df2005-02-10 02:07:43 +00002455 fpu : yes
2456 fpu_exception : yes
sewardj150c9cd2008-02-09 01:16:02 +00002457 cpuid level : 10
sewardjd0a12df2005-02-10 02:07:43 +00002458 wp : yes
sewardj150c9cd2008-02-09 01:16:02 +00002459 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2460 mtrr pge mca cmov pat pse36 clflush dts acpi
2461 mmx fxsr sse sse2 ss ht tm syscall nx lm
2462 constant_tsc pni monitor ds_cpl vmx est tm2
2463 cx16 xtpr lahf_lm
2464 bogomips : 4798.78
sewardjd0a12df2005-02-10 02:07:43 +00002465 clflush size : 64
2466 cache_alignment : 64
sewardj150c9cd2008-02-09 01:16:02 +00002467 address sizes : 36 bits physical, 48 bits virtual
2468 power management:
sewardjd0a12df2005-02-10 02:07:43 +00002469*/
sewardje9d8a262009-07-01 08:06:34 +00002470void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
sewardjd0a12df2005-02-10 02:07:43 +00002471{
2472# define SET_ABCD(_a,_b,_c,_d) \
2473 do { st->guest_RAX = (ULong)(_a); \
2474 st->guest_RBX = (ULong)(_b); \
2475 st->guest_RCX = (ULong)(_c); \
2476 st->guest_RDX = (ULong)(_d); \
2477 } while (0)
2478
2479 switch (0xFFFFFFFF & st->guest_RAX) {
sewardj150c9cd2008-02-09 01:16:02 +00002480 case 0x00000000:
2481 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
sewardjd0a12df2005-02-10 02:07:43 +00002482 break;
sewardj150c9cd2008-02-09 01:16:02 +00002483 case 0x00000001:
2484 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
sewardjd0a12df2005-02-10 02:07:43 +00002485 break;
sewardj150c9cd2008-02-09 01:16:02 +00002486 case 0x00000002:
2487 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
sewardjd0a12df2005-02-10 02:07:43 +00002488 break;
sewardj150c9cd2008-02-09 01:16:02 +00002489 case 0x00000003:
2490 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
sewardjd0a12df2005-02-10 02:07:43 +00002491 break;
sewardj32bfd3e2008-02-10 13:29:19 +00002492 case 0x00000004: {
2493 switch (0xFFFFFFFF & st->guest_RCX) {
2494 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2495 0x0000003f, 0x00000001); break;
2496 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2497 0x0000003f, 0x00000001); break;
2498 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2499 0x00000fff, 0x00000001); break;
2500 default: SET_ABCD(0x00000000, 0x00000000,
2501 0x00000000, 0x00000000); break;
2502 }
sewardjd0a12df2005-02-10 02:07:43 +00002503 break;
sewardj32bfd3e2008-02-10 13:29:19 +00002504 }
sewardj150c9cd2008-02-09 01:16:02 +00002505 case 0x00000005:
2506 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
sewardjd0a12df2005-02-10 02:07:43 +00002507 break;
sewardj150c9cd2008-02-09 01:16:02 +00002508 case 0x00000006:
2509 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
sewardjd0a12df2005-02-10 02:07:43 +00002510 break;
sewardj150c9cd2008-02-09 01:16:02 +00002511 case 0x00000007:
2512 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
sewardjd0a12df2005-02-10 02:07:43 +00002513 break;
sewardj150c9cd2008-02-09 01:16:02 +00002514 case 0x00000008:
2515 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
sewardjd0a12df2005-02-10 02:07:43 +00002516 break;
sewardj150c9cd2008-02-09 01:16:02 +00002517 case 0x00000009:
2518 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
sewardjd0a12df2005-02-10 02:07:43 +00002519 break;
sewardj150c9cd2008-02-09 01:16:02 +00002520 case 0x0000000a:
sewardj32bfd3e2008-02-10 13:29:19 +00002521 unhandled_eax_value:
sewardj150c9cd2008-02-09 01:16:02 +00002522 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2523 break;
2524 case 0x80000000:
2525 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2526 break;
2527 case 0x80000001:
2528 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2529 break;
2530 case 0x80000002:
2531 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2532 break;
2533 case 0x80000003:
2534 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2535 break;
2536 case 0x80000004:
2537 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2538 break;
2539 case 0x80000005:
2540 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2541 break;
2542 case 0x80000006:
2543 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2544 break;
2545 case 0x80000007:
2546 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2547 break;
2548 case 0x80000008:
2549 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2550 break;
sewardjd0a12df2005-02-10 02:07:43 +00002551 default:
sewardj32bfd3e2008-02-10 13:29:19 +00002552 goto unhandled_eax_value;
sewardjd0a12df2005-02-10 02:07:43 +00002553 }
2554# undef SET_ABCD
2555}
2556
2557
sewardj0b2d3fe2010-08-06 07:59:38 +00002558/* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2559 capable.
2560
2561 vendor_id : GenuineIntel
2562 cpu family : 6
2563 model : 37
2564 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
2565 stepping : 2
2566 cpu MHz : 3334.000
2567 cache size : 4096 KB
2568 physical id : 0
2569 siblings : 4
2570 core id : 0
2571 cpu cores : 2
2572 apicid : 0
2573 initial apicid : 0
2574 fpu : yes
2575 fpu_exception : yes
2576 cpuid level : 11
2577 wp : yes
2578 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2579 mtrr pge mca cmov pat pse36 clflush dts acpi
2580 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2581 lm constant_tsc arch_perfmon pebs bts rep_good
2582 xtopology nonstop_tsc aperfmperf pni pclmulqdq
2583 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2584 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2585 arat tpr_shadow vnmi flexpriority ept vpid
2586 bogomips : 6957.57
2587 clflush size : 64
2588 cache_alignment : 64
2589 address sizes : 36 bits physical, 48 bits virtual
2590 power management:
2591*/
2592void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2593{
2594# define SET_ABCD(_a,_b,_c,_d) \
2595 do { st->guest_RAX = (ULong)(_a); \
2596 st->guest_RBX = (ULong)(_b); \
2597 st->guest_RCX = (ULong)(_c); \
2598 st->guest_RDX = (ULong)(_d); \
2599 } while (0)
2600
2601 UInt old_eax = (UInt)st->guest_RAX;
2602 UInt old_ecx = (UInt)st->guest_RCX;
2603
2604 switch (old_eax) {
2605 case 0x00000000:
2606 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2607 break;
2608 case 0x00000001:
philippeff4d6be2012-02-14 21:34:56 +00002609 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
sewardj0b2d3fe2010-08-06 07:59:38 +00002610 break;
2611 case 0x00000002:
2612 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2613 break;
2614 case 0x00000003:
2615 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2616 break;
2617 case 0x00000004:
2618 switch (old_ecx) {
2619 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2620 0x0000003f, 0x00000000); break;
2621 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2622 0x0000007f, 0x00000000); break;
2623 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2624 0x000001ff, 0x00000000); break;
2625 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2626 0x00000fff, 0x00000002); break;
2627 default: SET_ABCD(0x00000000, 0x00000000,
2628 0x00000000, 0x00000000); break;
2629 }
2630 break;
2631 case 0x00000005:
2632 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2633 break;
2634 case 0x00000006:
2635 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2636 break;
2637 case 0x00000007:
2638 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2639 break;
2640 case 0x00000008:
2641 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2642 break;
2643 case 0x00000009:
2644 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2645 break;
2646 case 0x0000000a:
2647 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2648 break;
2649 case 0x0000000b:
2650 switch (old_ecx) {
2651 case 0x00000000:
2652 SET_ABCD(0x00000001, 0x00000002,
2653 0x00000100, 0x00000000); break;
2654 case 0x00000001:
2655 SET_ABCD(0x00000004, 0x00000004,
2656 0x00000201, 0x00000000); break;
2657 default:
2658 SET_ABCD(0x00000000, 0x00000000,
2659 old_ecx, 0x00000000); break;
2660 }
2661 break;
2662 case 0x0000000c:
2663 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2664 break;
2665 case 0x0000000d:
2666 switch (old_ecx) {
2667 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2668 0x00000100, 0x00000000); break;
2669 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2670 0x00000201, 0x00000000); break;
2671 default: SET_ABCD(0x00000000, 0x00000000,
2672 old_ecx, 0x00000000); break;
2673 }
2674 break;
2675 case 0x80000000:
2676 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2677 break;
2678 case 0x80000001:
2679 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2680 break;
2681 case 0x80000002:
2682 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2683 break;
2684 case 0x80000003:
2685 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2686 break;
2687 case 0x80000004:
2688 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2689 break;
2690 case 0x80000005:
2691 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2692 break;
2693 case 0x80000006:
2694 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2695 break;
2696 case 0x80000007:
2697 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2698 break;
2699 case 0x80000008:
2700 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2701 break;
2702 default:
2703 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2704 break;
2705 }
2706# undef SET_ABCD
2707}
2708
2709
sewardjfe0c5e72012-06-15 15:48:07 +00002710/* Claim to be the following CPU (4 x ...), which is AVX and cx16
sewardj9e4c3762013-09-27 15:03:58 +00002711 capable. Plus (kludge!) it "supports" HTM.
sewardjfe0c5e72012-06-15 15:48:07 +00002712
2713 vendor_id : GenuineIntel
2714 cpu family : 6
2715 model : 42
2716 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2717 stepping : 7
2718 cpu MHz : 1600.000
2719 cache size : 6144 KB
2720 physical id : 0
2721 siblings : 4
2722 core id : 3
2723 cpu cores : 4
2724 apicid : 6
2725 initial apicid : 6
2726 fpu : yes
2727 fpu_exception : yes
2728 cpuid level : 13
2729 wp : yes
2730 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2731 mtrr pge mca cmov pat pse36 clflush dts acpi
2732 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2733 lm constant_tsc arch_perfmon pebs bts rep_good
2734 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
2735 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
2736 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
2737 lahf_lm ida arat epb xsaveopt pln pts dts
2738 tpr_shadow vnmi flexpriority ept vpid
2739
2740 bogomips : 5768.94
2741 clflush size : 64
2742 cache_alignment : 64
2743 address sizes : 36 bits physical, 48 bits virtual
2744 power management:
2745*/
2746void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2747{
2748# define SET_ABCD(_a,_b,_c,_d) \
2749 do { st->guest_RAX = (ULong)(_a); \
2750 st->guest_RBX = (ULong)(_b); \
2751 st->guest_RCX = (ULong)(_c); \
2752 st->guest_RDX = (ULong)(_d); \
2753 } while (0)
2754
2755 UInt old_eax = (UInt)st->guest_RAX;
2756 UInt old_ecx = (UInt)st->guest_RCX;
2757
2758 switch (old_eax) {
2759 case 0x00000000:
2760 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2761 break;
2762 case 0x00000001:
2763 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2764 break;
2765 case 0x00000002:
2766 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2767 break;
2768 case 0x00000003:
2769 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2770 break;
2771 case 0x00000004:
2772 switch (old_ecx) {
2773 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2774 0x0000003f, 0x00000000); break;
2775 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
2776 0x0000003f, 0x00000000); break;
2777 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2778 0x000001ff, 0x00000000); break;
2779 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
2780 0x00001fff, 0x00000006); break;
2781 default: SET_ABCD(0x00000000, 0x00000000,
2782 0x00000000, 0x00000000); break;
2783 }
2784 break;
2785 case 0x00000005:
2786 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2787 break;
2788 case 0x00000006:
2789 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
2790 break;
2791 case 0x00000007:
sewardj9e4c3762013-09-27 15:03:58 +00002792 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
sewardjfe0c5e72012-06-15 15:48:07 +00002793 break;
2794 case 0x00000008:
2795 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2796 break;
2797 case 0x00000009:
2798 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2799 break;
2800 case 0x0000000a:
2801 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
2802 break;
2803 case 0x0000000b:
2804 switch (old_ecx) {
2805 case 0x00000000:
2806 SET_ABCD(0x00000001, 0x00000001,
2807 0x00000100, 0x00000000); break;
2808 case 0x00000001:
2809 SET_ABCD(0x00000004, 0x00000004,
2810 0x00000201, 0x00000000); break;
2811 default:
2812 SET_ABCD(0x00000000, 0x00000000,
2813 old_ecx, 0x00000000); break;
2814 }
2815 break;
2816 case 0x0000000c:
2817 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2818 break;
2819 case 0x0000000d:
2820 switch (old_ecx) {
2821 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
2822 0x00000340, 0x00000000); break;
2823 case 0x00000001: SET_ABCD(0x00000001, 0x00000000,
2824 0x00000000, 0x00000000); break;
2825 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
2826 0x00000000, 0x00000000); break;
2827 default: SET_ABCD(0x00000000, 0x00000000,
2828 0x00000000, 0x00000000); break;
2829 }
2830 break;
2831 case 0x0000000e:
2832 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2833 break;
2834 case 0x0000000f:
2835 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2836 break;
2837 case 0x80000000:
2838 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2839 break;
2840 case 0x80000001:
2841 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2842 break;
2843 case 0x80000002:
2844 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
2845 break;
2846 case 0x80000003:
2847 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
2848 break;
2849 case 0x80000004:
2850 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
2851 break;
2852 case 0x80000005:
2853 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2854 break;
2855 case 0x80000006:
2856 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2857 break;
2858 case 0x80000007:
2859 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2860 break;
2861 case 0x80000008:
2862 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2863 break;
2864 default:
2865 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2866 break;
2867 }
2868# undef SET_ABCD
2869}
2870
2871
sewardj112b0992005-07-23 13:19:32 +00002872ULong amd64g_calculate_RCR ( ULong arg,
2873 ULong rot_amt,
2874 ULong rflags_in,
2875 Long szIN )
2876{
2877 Bool wantRflags = toBool(szIN < 0);
2878 ULong sz = wantRflags ? (-szIN) : szIN;
2879 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2880 ULong cf=0, of=0, tempcf;
2881
2882 switch (sz) {
2883 case 8:
2884 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2885 of = ((arg >> 63) ^ cf) & 1;
2886 while (tempCOUNT > 0) {
2887 tempcf = arg & 1;
2888 arg = (arg >> 1) | (cf << 63);
2889 cf = tempcf;
2890 tempCOUNT--;
2891 }
2892 break;
2893 case 4:
2894 while (tempCOUNT >= 33) tempCOUNT -= 33;
2895 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2896 of = ((arg >> 31) ^ cf) & 1;
2897 while (tempCOUNT > 0) {
2898 tempcf = arg & 1;
2899 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2900 cf = tempcf;
2901 tempCOUNT--;
2902 }
2903 break;
2904 case 2:
2905 while (tempCOUNT >= 17) tempCOUNT -= 17;
2906 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2907 of = ((arg >> 15) ^ cf) & 1;
2908 while (tempCOUNT > 0) {
2909 tempcf = arg & 1;
2910 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2911 cf = tempcf;
2912 tempCOUNT--;
2913 }
2914 break;
2915 case 1:
2916 while (tempCOUNT >= 9) tempCOUNT -= 9;
2917 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2918 of = ((arg >> 7) ^ cf) & 1;
2919 while (tempCOUNT > 0) {
2920 tempcf = arg & 1;
2921 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
2922 cf = tempcf;
2923 tempCOUNT--;
2924 }
2925 break;
2926 default:
2927 vpanic("calculate_RCR(amd64g): invalid size");
2928 }
2929
2930 cf &= 1;
2931 of &= 1;
2932 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2933 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2934
2935 /* caller can ask to have back either the resulting flags or
2936 resulting value, but not both */
2937 return wantRflags ? rflags_in : arg;
2938}
2939
sewardjb5e5c6d2007-01-12 20:29:01 +00002940ULong amd64g_calculate_RCL ( ULong arg,
2941 ULong rot_amt,
2942 ULong rflags_in,
2943 Long szIN )
2944{
2945 Bool wantRflags = toBool(szIN < 0);
2946 ULong sz = wantRflags ? (-szIN) : szIN;
2947 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2948 ULong cf=0, of=0, tempcf;
2949
2950 switch (sz) {
2951 case 8:
2952 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2953 while (tempCOUNT > 0) {
2954 tempcf = (arg >> 63) & 1;
2955 arg = (arg << 1) | (cf & 1);
2956 cf = tempcf;
2957 tempCOUNT--;
2958 }
2959 of = ((arg >> 63) ^ cf) & 1;
2960 break;
2961 case 4:
2962 while (tempCOUNT >= 33) tempCOUNT -= 33;
2963 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2964 while (tempCOUNT > 0) {
2965 tempcf = (arg >> 31) & 1;
2966 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2967 cf = tempcf;
2968 tempCOUNT--;
2969 }
2970 of = ((arg >> 31) ^ cf) & 1;
2971 break;
2972 case 2:
2973 while (tempCOUNT >= 17) tempCOUNT -= 17;
2974 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2975 while (tempCOUNT > 0) {
2976 tempcf = (arg >> 15) & 1;
2977 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
2978 cf = tempcf;
2979 tempCOUNT--;
2980 }
2981 of = ((arg >> 15) ^ cf) & 1;
2982 break;
2983 case 1:
2984 while (tempCOUNT >= 9) tempCOUNT -= 9;
2985 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2986 while (tempCOUNT > 0) {
2987 tempcf = (arg >> 7) & 1;
2988 arg = 0xFFULL & ((arg << 1) | (cf & 1));
2989 cf = tempcf;
2990 tempCOUNT--;
2991 }
2992 of = ((arg >> 7) ^ cf) & 1;
2993 break;
2994 default:
2995 vpanic("calculate_RCL(amd64g): invalid size");
2996 }
2997
2998 cf &= 1;
2999 of &= 1;
3000 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3001 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3002
3003 return wantRflags ? rflags_in : arg;
3004}
3005
sewardj1a179b52010-09-28 19:56:32 +00003006/* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3007 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3008 */
3009ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3010{
3011 ULong hi, lo, tmp, A[16];
3012
3013 A[0] = 0; A[1] = a;
3014 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3015 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3016 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3017 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3018 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3019 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3020 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3021
3022 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3023 hi = lo >> 56;
3024 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3025 hi = (hi << 8) | (lo >> 56);
3026 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3027 hi = (hi << 8) | (lo >> 56);
3028 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3029 hi = (hi << 8) | (lo >> 56);
3030 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3031 hi = (hi << 8) | (lo >> 56);
3032 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3033 hi = (hi << 8) | (lo >> 56);
3034 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3035 hi = (hi << 8) | (lo >> 56);
3036 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3037
3038 ULong m0 = -1;
3039 m0 /= 255;
3040 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3041 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3042 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3043 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3044 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3045 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3046 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3047
3048 return which ? hi : lo;
3049}
3050
sewardj112b0992005-07-23 13:19:32 +00003051
sewardjbc6af532005-08-23 23:16:51 +00003052/* CALLED FROM GENERATED CODE */
3053/* DIRTY HELPER (non-referentially-transparent) */
3054/* Horrible hack. On non-amd64 platforms, return 1. */
3055ULong amd64g_dirtyhelper_RDTSC ( void )
3056{
3057# if defined(__x86_64__)
3058 UInt eax, edx;
3059 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3060 return (((ULong)edx) << 32) | ((ULong)eax);
3061# else
3062 return 1ULL;
3063# endif
3064}
3065
sewardj818c7302013-03-26 13:53:18 +00003066/* CALLED FROM GENERATED CODE */
3067/* DIRTY HELPER (non-referentially-transparent) */
3068/* Horrible hack. On non-amd64 platforms, return 1. */
3069/* This uses a different calling convention from _RDTSC just above
3070 only because of the difficulty of returning 96 bits from a C
3071 function -- RDTSC returns 64 bits and so is simple by comparison,
3072 on amd64. */
3073void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3074{
3075# if defined(__x86_64__)
3076 UInt eax, ecx, edx;
3077 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3078 st->guest_RAX = (ULong)eax;
3079 st->guest_RCX = (ULong)ecx;
3080 st->guest_RDX = (ULong)edx;
3081# else
3082 /* Do nothing. */
3083# endif
3084}
sewardjbc6af532005-08-23 23:16:51 +00003085
sewardjbb4396c2007-11-20 17:29:08 +00003086/* CALLED FROM GENERATED CODE */
3087/* DIRTY HELPER (non-referentially-transparent) */
3088/* Horrible hack. On non-amd64 platforms, return 0. */
3089ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3090{
3091# if defined(__x86_64__)
3092 ULong r = 0;
3093 portno &= 0xFFFF;
3094 switch (sz) {
3095 case 4:
3096 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3097 : "=a" (r) : "Nd" (portno));
3098 break;
3099 case 2:
3100 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3101 : "=a" (r) : "Nd" (portno));
3102 break;
3103 case 1:
3104 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3105 : "=a" (r) : "Nd" (portno));
3106 break;
3107 default:
3108 break; /* note: no 64-bit version of insn exists */
3109 }
3110 return r;
3111# else
3112 return 0;
3113# endif
3114}
3115
3116
3117/* CALLED FROM GENERATED CODE */
3118/* DIRTY HELPER (non-referentially-transparent) */
3119/* Horrible hack. On non-amd64 platforms, do nothing. */
3120void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3121{
3122# if defined(__x86_64__)
3123 portno &= 0xFFFF;
3124 switch (sz) {
3125 case 4:
3126 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3127 : : "a" (data), "Nd" (portno));
3128 break;
3129 case 2:
3130 __asm__ __volatile__("outw %w0, %w1"
3131 : : "a" (data), "Nd" (portno));
3132 break;
3133 case 1:
3134 __asm__ __volatile__("outb %b0, %w1"
3135 : : "a" (data), "Nd" (portno));
3136 break;
3137 default:
3138 break; /* note: no 64-bit version of insn exists */
3139 }
3140# else
3141 /* do nothing */
3142# endif
3143}
3144
sewardjb9dc2432010-06-07 16:22:22 +00003145/* CALLED FROM GENERATED CODE */
3146/* DIRTY HELPER (non-referentially-transparent) */
3147/* Horrible hack. On non-amd64 platforms, do nothing. */
3148/* op = 0: call the native SGDT instruction.
3149 op = 1: call the native SIDT instruction.
3150*/
3151void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3152# if defined(__x86_64__)
3153 switch (op) {
3154 case 0:
3155 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3156 break;
3157 case 1:
3158 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3159 break;
3160 default:
3161 vpanic("amd64g_dirtyhelper_SxDT");
3162 }
3163# else
3164 /* do nothing */
3165 UChar* p = (UChar*)address;
3166 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3167 p[6] = p[7] = p[8] = p[9] = 0;
3168# endif
3169}
sewardjbb4396c2007-11-20 17:29:08 +00003170
sewardj8711f662005-05-09 17:52:56 +00003171/*---------------------------------------------------------------*/
3172/*--- Helpers for MMX/SSE/SSE2. ---*/
3173/*---------------------------------------------------------------*/
3174
sewardja7ba8c42005-05-10 20:08:34 +00003175static inline UChar abdU8 ( UChar xx, UChar yy ) {
3176 return toUChar(xx>yy ? xx-yy : yy-xx);
3177}
3178
sewardj8711f662005-05-09 17:52:56 +00003179static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3180 return (((ULong)w1) << 32) | ((ULong)w0);
3181}
3182
3183static inline UShort sel16x4_3 ( ULong w64 ) {
3184 UInt hi32 = toUInt(w64 >> 32);
3185 return toUShort(hi32 >> 16);
3186}
3187static inline UShort sel16x4_2 ( ULong w64 ) {
3188 UInt hi32 = toUInt(w64 >> 32);
3189 return toUShort(hi32);
3190}
3191static inline UShort sel16x4_1 ( ULong w64 ) {
3192 UInt lo32 = toUInt(w64);
3193 return toUShort(lo32 >> 16);
3194}
3195static inline UShort sel16x4_0 ( ULong w64 ) {
3196 UInt lo32 = toUInt(w64);
3197 return toUShort(lo32);
3198}
3199
sewardja7ba8c42005-05-10 20:08:34 +00003200static inline UChar sel8x8_7 ( ULong w64 ) {
3201 UInt hi32 = toUInt(w64 >> 32);
3202 return toUChar(hi32 >> 24);
3203}
3204static inline UChar sel8x8_6 ( ULong w64 ) {
3205 UInt hi32 = toUInt(w64 >> 32);
3206 return toUChar(hi32 >> 16);
3207}
3208static inline UChar sel8x8_5 ( ULong w64 ) {
3209 UInt hi32 = toUInt(w64 >> 32);
3210 return toUChar(hi32 >> 8);
3211}
3212static inline UChar sel8x8_4 ( ULong w64 ) {
3213 UInt hi32 = toUInt(w64 >> 32);
3214 return toUChar(hi32 >> 0);
3215}
3216static inline UChar sel8x8_3 ( ULong w64 ) {
3217 UInt lo32 = toUInt(w64);
3218 return toUChar(lo32 >> 24);
3219}
3220static inline UChar sel8x8_2 ( ULong w64 ) {
3221 UInt lo32 = toUInt(w64);
3222 return toUChar(lo32 >> 16);
3223}
3224static inline UChar sel8x8_1 ( ULong w64 ) {
3225 UInt lo32 = toUInt(w64);
3226 return toUChar(lo32 >> 8);
3227}
3228static inline UChar sel8x8_0 ( ULong w64 ) {
3229 UInt lo32 = toUInt(w64);
3230 return toUChar(lo32 >> 0);
3231}
3232
sewardj8711f662005-05-09 17:52:56 +00003233/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3234ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3235{
3236 return
3237 mk32x2(
3238 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3239 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3240 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3241 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3242 );
3243}
3244
sewardja7ba8c42005-05-10 20:08:34 +00003245/* CALLED FROM GENERATED CODE: CLEAN HELPER */
sewardja7ba8c42005-05-10 20:08:34 +00003246ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3247{
3248 UInt t = 0;
3249 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3250 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3251 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3252 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3253 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3254 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3255 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3256 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3257 t &= 0xFFFF;
3258 return (ULong)t;
3259}
3260
sewardjadffcef2005-05-11 00:03:06 +00003261/* CALLED FROM GENERATED CODE: CLEAN HELPER */
sewardj8cb931e2012-02-16 22:02:14 +00003262ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3263{
3264 UShort t, min;
3265 UInt idx;
3266 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
3267 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3268 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3269 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3270 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3271 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3272 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3273 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3274 return ((ULong)(idx << 16)) | ((ULong)min);
3275}
3276
3277/* CALLED FROM GENERATED CODE: CLEAN HELPER */
sewardj186f8692011-01-21 17:51:44 +00003278ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3279{
3280 UInt i;
3281 ULong crc = (b & 0xFFULL) ^ crcIn;
3282 for (i = 0; i < 8; i++)
3283 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3284 return crc;
3285}
3286
3287/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3288ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3289{
3290 UInt i;
3291 ULong crc = (w & 0xFFFFULL) ^ crcIn;
3292 for (i = 0; i < 16; i++)
3293 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3294 return crc;
3295}
3296
3297/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3298ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3299{
3300 UInt i;
3301 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3302 for (i = 0; i < 32; i++)
3303 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3304 return crc;
3305}
3306
3307/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3308ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3309{
3310 ULong crc = amd64g_calc_crc32l(crcIn, q);
3311 return amd64g_calc_crc32l(crc, q >> 32);
3312}
3313
sewardjd0a12df2005-02-10 02:07:43 +00003314
sewardj4d5bce22012-02-21 11:02:44 +00003315/* .. helper for next fn .. */
3316static inline ULong sad_8x4 ( ULong xx, ULong yy )
3317{
3318 UInt t = 0;
3319 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3320 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3321 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3322 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3323 return (ULong)t;
3324}
3325
3326/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3327ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3328 ULong dHi, ULong dLo,
3329 ULong imm_and_return_control_bit )
3330{
3331 UInt imm8 = imm_and_return_control_bit & 7;
3332 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
3333 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3334 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3335 /* For src we only need 32 bits, so get them into the
3336 lower half of a 64 bit word. */
3337 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3338 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3339 11 bytes. If calculating the low part of the result, need bytes
3340 dstOffsL * 4 + (0 .. 6); if calculating the high part,
3341 dstOffsL * 4 + (4 .. 10). */
3342 ULong dst;
3343 /* dstOffL = 0, Lo -> 0 .. 6
3344 dstOffL = 1, Lo -> 4 .. 10
3345 dstOffL = 0, Hi -> 4 .. 10
3346 dstOffL = 1, Hi -> 8 .. 14
3347 */
3348 if (calcHi && dstOffsL) {
3349 /* 8 .. 14 */
3350 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3351 }
3352 else if (!calcHi && !dstOffsL) {
3353 /* 0 .. 6 */
3354 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3355 }
3356 else {
3357 /* 4 .. 10 */
3358 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3359 }
3360 ULong r0 = sad_8x4( dst >> 0, src );
3361 ULong r1 = sad_8x4( dst >> 8, src );
3362 ULong r2 = sad_8x4( dst >> 16, src );
3363 ULong r3 = sad_8x4( dst >> 24, src );
3364 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3365 return res;
3366}
3367
sewardjcc3d2192013-03-27 11:37:33 +00003368/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3369ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3370{
3371 ULong dst = 0;
3372 ULong src_bit;
3373 ULong dst_bit = 1;
3374 for (src_bit = 1; src_bit; src_bit <<= 1) {
3375 if (mask & src_bit) {
3376 if (src_masked & src_bit) dst |= dst_bit;
3377 dst_bit <<= 1;
3378 }
3379 }
3380 return dst;
3381}
3382
3383/* CALLED FROM GENERATED CODE: CLEAN HELPER */
3384ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3385{
3386 ULong dst = 0;
3387 ULong dst_bit;
3388 ULong src_bit = 1;
3389 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3390 if (mask & dst_bit) {
3391 if (src & src_bit) dst |= dst_bit;
3392 src_bit <<= 1;
3393 }
3394 }
3395 return dst;
3396}
3397
sewardjf8c37f72005-02-07 18:55:29 +00003398/*---------------------------------------------------------------*/
sewardj0b2d3fe2010-08-06 07:59:38 +00003399/*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
3400/*---------------------------------------------------------------*/
3401
sewardjacfbd7d2010-08-17 22:52:08 +00003402static UInt zmask_from_V128 ( V128* arg )
3403{
3404 UInt i, res = 0;
3405 for (i = 0; i < 16; i++) {
3406 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
3407 }
3408 return res;
3409}
3410
sewardj3c3d6d62012-02-16 15:21:08 +00003411static UInt zmask_from_V128_wide ( V128* arg )
3412{
3413 UInt i, res = 0;
3414 for (i = 0; i < 8; i++) {
3415 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
3416 }
3417 return res;
3418}
3419
sewardjacfbd7d2010-08-17 22:52:08 +00003420/* Helps with PCMP{I,E}STR{I,M}.
3421
3422 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
sewardj0b2d3fe2010-08-06 07:59:38 +00003423 actually it could be a clean helper, but for the fact that we can't
sewardjacfbd7d2010-08-17 22:52:08 +00003424 pass by value 2 x V128 to a clean helper, nor have one returned.)
3425 Reads guest state, writes to guest state for the xSTRM cases, no
3426 accesses of memory, is a pure function.
3427
3428 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3429 the callee knows which I/E and I/M variant it is dealing with and
3430 what the specific operation is. 4th byte of opcode is in the range
3431 0x60 to 0x63:
3432 istri 66 0F 3A 63
3433 istrm 66 0F 3A 62
3434 estri 66 0F 3A 61
3435 estrm 66 0F 3A 60
3436
3437 gstOffL and gstOffR are the guest state offsets for the two XMM
3438 register inputs. We never have to deal with the memory case since
3439 that is handled by pre-loading the relevant value into the fake
3440 XMM16 register.
3441
3442 For ESTRx variants, edxIN and eaxIN hold the values of those two
3443 registers.
3444
3445 In all cases, the bottom 16 bits of the result contain the new
3446 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
3447 result hold the new %ecx value. For xSTRM variants, the helper
3448 writes the result directly to the guest XMM0.
3449
3450 Declarable side effects: in all cases, reads guest state at
3451 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
3452 guest_XMM0.
3453
3454 Is expected to be called with opc_and_imm combinations which have
3455 actually been validated, and will assert if otherwise. The front
3456 end should ensure we're only called with verified values.
sewardj0b2d3fe2010-08-06 07:59:38 +00003457*/
sewardjacfbd7d2010-08-17 22:52:08 +00003458ULong amd64g_dirtyhelper_PCMPxSTRx (
3459 VexGuestAMD64State* gst,
3460 HWord opc4_and_imm,
3461 HWord gstOffL, HWord gstOffR,
3462 HWord edxIN, HWord eaxIN
3463 )
sewardj0b2d3fe2010-08-06 07:59:38 +00003464{
sewardjacfbd7d2010-08-17 22:52:08 +00003465 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3466 HWord imm8 = opc4_and_imm & 0xFF;
3467 HWord isISTRx = opc4 & 2;
3468 HWord isxSTRM = (opc4 & 1) ^ 1;
3469 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
sewardj3c3d6d62012-02-16 15:21:08 +00003470 HWord wide = (imm8 & 1);
sewardj0b2d3fe2010-08-06 07:59:38 +00003471
sewardjacfbd7d2010-08-17 22:52:08 +00003472 // where the args are
3473 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3474 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
sewardj0b2d3fe2010-08-06 07:59:38 +00003475
sewardjacfbd7d2010-08-17 22:52:08 +00003476 /* Create the arg validity masks, either from the vectors
3477 themselves or from the supplied edx/eax values. */
3478 // FIXME: this is only right for the 8-bit data cases.
3479 // At least that is asserted above.
3480 UInt zmaskL, zmaskR;
sewardj0b2d3fe2010-08-06 07:59:38 +00003481
sewardjacfbd7d2010-08-17 22:52:08 +00003482 // temp spot for the resulting flags and vector.
3483 V128 resV;
3484 UInt resOSZACP;
3485
sewardj3c3d6d62012-02-16 15:21:08 +00003486 // for checking whether case was handled
3487 Bool ok = False;
3488
3489 if (wide) {
3490 if (isISTRx) {
3491 zmaskL = zmask_from_V128_wide(argL);
3492 zmaskR = zmask_from_V128_wide(argR);
3493 } else {
3494 Int tmp;
3495 tmp = edxIN & 0xFFFFFFFF;
3496 if (tmp < -8) tmp = -8;
3497 if (tmp > 8) tmp = 8;
3498 if (tmp < 0) tmp = -tmp;
3499 vassert(tmp >= 0 && tmp <= 8);
3500 zmaskL = (1 << tmp) & 0xFF;
3501 tmp = eaxIN & 0xFFFFFFFF;
3502 if (tmp < -8) tmp = -8;
3503 if (tmp > 8) tmp = 8;
3504 if (tmp < 0) tmp = -tmp;
3505 vassert(tmp >= 0 && tmp <= 8);
3506 zmaskR = (1 << tmp) & 0xFF;
3507 }
3508 // do the meyaath
3509 ok = compute_PCMPxSTRx_wide (
3510 &resV, &resOSZACP, argL, argR,
3511 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3512 );
3513 } else {
3514 if (isISTRx) {
3515 zmaskL = zmask_from_V128(argL);
3516 zmaskR = zmask_from_V128(argR);
3517 } else {
3518 Int tmp;
3519 tmp = edxIN & 0xFFFFFFFF;
3520 if (tmp < -16) tmp = -16;
3521 if (tmp > 16) tmp = 16;
3522 if (tmp < 0) tmp = -tmp;
3523 vassert(tmp >= 0 && tmp <= 16);
3524 zmaskL = (1 << tmp) & 0xFFFF;
3525 tmp = eaxIN & 0xFFFFFFFF;
3526 if (tmp < -16) tmp = -16;
3527 if (tmp > 16) tmp = 16;
3528 if (tmp < 0) tmp = -tmp;
3529 vassert(tmp >= 0 && tmp <= 16);
3530 zmaskR = (1 << tmp) & 0xFFFF;
3531 }
3532 // do the meyaath
3533 ok = compute_PCMPxSTRx (
3534 &resV, &resOSZACP, argL, argR,
3535 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3536 );
3537 }
sewardjacfbd7d2010-08-17 22:52:08 +00003538
3539 // front end shouldn't pass us any imm8 variants we can't
3540 // handle. Hence:
3541 vassert(ok);
3542
3543 // So, finally we need to get the results back to the caller.
3544 // In all cases, the new OSZACP value is the lowest 16 of
3545 // the return value.
3546 if (isxSTRM) {
sewardjc4530ae2012-05-21 10:18:49 +00003547 gst->guest_YMM0[0] = resV.w32[0];
3548 gst->guest_YMM0[1] = resV.w32[1];
3549 gst->guest_YMM0[2] = resV.w32[2];
3550 gst->guest_YMM0[3] = resV.w32[3];
sewardjacfbd7d2010-08-17 22:52:08 +00003551 return resOSZACP & 0x8D5;
3552 } else {
3553 UInt newECX = resV.w32[0] & 0xFFFF;
3554 return (newECX << 16) | (resOSZACP & 0x8D5);
3555 }
sewardj0b2d3fe2010-08-06 07:59:38 +00003556}
3557
philippeff4d6be2012-02-14 21:34:56 +00003558/*---------------------------------------------------------------*/
3559/*--- AES primitives and helpers ---*/
3560/*---------------------------------------------------------------*/
3561/* a 16 x 16 matrix */
3562static const UChar sbox[256] = { // row nr
3563 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
3564 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3565 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
3566 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3567 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
3568 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3569 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
3570 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3571 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
3572 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3573 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
3574 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3575 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
3576 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3577 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
3578 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3579 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
3580 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3581 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
3582 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3583 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
3584 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3585 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
3586 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3587 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
3588 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3589 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
3590 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3591 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
3592 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3593 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
3594 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3595};
3596static void SubBytes (V128* v)
3597{
3598 V128 r;
3599 UInt i;
3600 for (i = 0; i < 16; i++)
3601 r.w8[i] = sbox[v->w8[i]];
3602 *v = r;
3603}
3604
3605/* a 16 x 16 matrix */
3606static const UChar invsbox[256] = { // row nr
3607 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
3608 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3609 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
3610 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3611 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
3612 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3613 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
3614 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3615 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
3616 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3617 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
3618 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3619 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
3620 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3621 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
3622 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3623 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
3624 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3625 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
3626 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3627 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
3628 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3629 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
3630 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3631 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
3632 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3633 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
3634 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3635 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
3636 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3637 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
3638 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3639};
3640static void InvSubBytes (V128* v)
3641{
3642 V128 r;
3643 UInt i;
3644 for (i = 0; i < 16; i++)
3645 r.w8[i] = invsbox[v->w8[i]];
3646 *v = r;
3647}
3648
3649static const UChar ShiftRows_op[16] =
3650 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
3651static void ShiftRows (V128* v)
3652{
3653 V128 r;
3654 UInt i;
3655 for (i = 0; i < 16; i++)
3656 r.w8[i] = v->w8[ShiftRows_op[15-i]];
3657 *v = r;
3658}
3659
3660static const UChar InvShiftRows_op[16] =
3661 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
3662static void InvShiftRows (V128* v)
3663{
3664 V128 r;
3665 UInt i;
3666 for (i = 0; i < 16; i++)
3667 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
3668 *v = r;
3669}
3670
3671/* Multiplication of the finite fields elements of AES.
3672 See "A Specification for The AES Algorithm Rijndael
3673 (by Joan Daemen & Vincent Rijmen)"
3674 Dr. Brian Gladman, v3.1, 3rd March 2001. */
3675/* N values so that (hex) xy = 0x03^N.
3676 0x00 cannot be used. We put 0xff for this value.*/
3677/* a 16 x 16 matrix */
3678static const UChar Nxy[256] = { // row nr
3679 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
3680 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
3681 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
3682 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
3683 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
3684 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
3685 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
3686 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
3687 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
3688 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
3689 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
3690 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
3691 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
3692 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
3693 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
3694 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
3695 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
3696 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
3697 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
3698 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
3699 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
3700 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
3701 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
3702 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
3703 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
3704 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
3705 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
3706 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
3707 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
3708 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
3709 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
3710 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
3711};
3712
3713/* E values so that E = 0x03^xy. */
3714static const UChar Exy[256] = { // row nr
3715 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
3716 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
3717 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
3718 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
3719 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
3720 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
3721 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
3722 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
3723 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
3724 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
3725 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
3726 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
3727 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
3728 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
3729 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
3730 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
3731 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
3732 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
3733 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
3734 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
3735 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
3736 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
3737 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
3738 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
3739 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
3740 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
3741 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
3742 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
3743 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
3744 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
3745 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
3746 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
3747
3748static inline UChar ff_mul(UChar u1, UChar u2)
3749{
3750 if ((u1 > 0) && (u2 > 0)) {
3751 UInt ui = Nxy[u1] + Nxy[u2];
3752 if (ui >= 255)
3753 ui = ui - 255;
3754 return Exy[ui];
3755 } else {
3756 return 0;
3757 };
3758}
3759
3760static void MixColumns (V128* v)
3761{
3762 V128 r;
3763 Int j;
3764#define P(x,row,col) (x)->w8[((row)*4+(col))]
3765 for (j = 0; j < 4; j++) {
3766 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
3767 ^ P(v,j,2) ^ P(v,j,3);
3768 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
3769 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
3770 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
3771 ^ ff_mul(0x03, P(v,j,3) );
3772 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
3773 ^ ff_mul( 0x02, P(v,j,3) );
3774 }
3775 *v = r;
3776#undef P
3777}
3778
3779static void InvMixColumns (V128* v)
3780{
3781 V128 r;
3782 Int j;
3783#define P(x,row,col) (x)->w8[((row)*4+(col))]
3784 for (j = 0; j < 4; j++) {
3785 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
3786 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
3787 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
3788 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
3789 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
3790 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
3791 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
3792 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
3793 }
3794 *v = r;
3795#undef P
3796
3797}
3798
3799/* For description, see definition in guest_amd64_defs.h */
3800void amd64g_dirtyhelper_AES (
3801 VexGuestAMD64State* gst,
sewardj1407a362012-06-24 15:11:38 +00003802 HWord opc4, HWord gstOffD,
philippeff4d6be2012-02-14 21:34:56 +00003803 HWord gstOffL, HWord gstOffR
3804 )
3805{
3806 // where the args are
sewardj1407a362012-06-24 15:11:38 +00003807 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
philippeff4d6be2012-02-14 21:34:56 +00003808 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3809 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
sewardj1407a362012-06-24 15:11:38 +00003810 V128 r;
philippeff4d6be2012-02-14 21:34:56 +00003811
3812 switch (opc4) {
3813 case 0xDC: /* AESENC */
3814 case 0xDD: /* AESENCLAST */
sewardj1407a362012-06-24 15:11:38 +00003815 r = *argR;
3816 ShiftRows (&r);
3817 SubBytes (&r);
philippeff4d6be2012-02-14 21:34:56 +00003818 if (opc4 == 0xDC)
sewardj1407a362012-06-24 15:11:38 +00003819 MixColumns (&r);
3820 argD->w64[0] = r.w64[0] ^ argL->w64[0];
3821 argD->w64[1] = r.w64[1] ^ argL->w64[1];
philippeff4d6be2012-02-14 21:34:56 +00003822 break;
3823
3824 case 0xDE: /* AESDEC */
3825 case 0xDF: /* AESDECLAST */
sewardj1407a362012-06-24 15:11:38 +00003826 r = *argR;
3827 InvShiftRows (&r);
3828 InvSubBytes (&r);
philippeff4d6be2012-02-14 21:34:56 +00003829 if (opc4 == 0xDE)
sewardj1407a362012-06-24 15:11:38 +00003830 InvMixColumns (&r);
3831 argD->w64[0] = r.w64[0] ^ argL->w64[0];
3832 argD->w64[1] = r.w64[1] ^ argL->w64[1];
philippeff4d6be2012-02-14 21:34:56 +00003833 break;
3834
3835 case 0xDB: /* AESIMC */
sewardj1407a362012-06-24 15:11:38 +00003836 *argD = *argL;
3837 InvMixColumns (argD);
philippeff4d6be2012-02-14 21:34:56 +00003838 break;
3839 default: vassert(0);
3840 }
3841}
3842
3843static inline UInt RotWord (UInt w32)
3844{
3845 return ((w32 >> 8) | (w32 << 24));
3846}
3847
3848static inline UInt SubWord (UInt w32)
3849{
3850 UChar *w8;
3851 UChar *r8;
3852 UInt res;
3853 w8 = (UChar*) &w32;
3854 r8 = (UChar*) &res;
3855 r8[0] = sbox[w8[0]];
3856 r8[1] = sbox[w8[1]];
3857 r8[2] = sbox[w8[2]];
3858 r8[3] = sbox[w8[3]];
3859 return res;
3860}
3861
3862/* For description, see definition in guest_amd64_defs.h */
3863extern void amd64g_dirtyhelper_AESKEYGENASSIST (
3864 VexGuestAMD64State* gst,
3865 HWord imm8,
3866 HWord gstOffL, HWord gstOffR
3867 )
3868{
3869 // where the args are
3870 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3871 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3872
sewardja35a6db2014-12-09 21:01:28 +00003873 // We have to create the result in a temporary in the
3874 // case where the src and dst regs are the same. See #341698.
3875 V128 tmp;
3876
3877 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
3878 tmp.w32[2] = SubWord (argL->w32[3]);
3879 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
3880 tmp.w32[0] = SubWord (argL->w32[1]);
3881
3882 argR->w32[3] = tmp.w32[3];
3883 argR->w32[2] = tmp.w32[2];
3884 argR->w32[1] = tmp.w32[1];
3885 argR->w32[0] = tmp.w32[0];
philippeff4d6be2012-02-14 21:34:56 +00003886}
3887
3888
sewardj0b2d3fe2010-08-06 07:59:38 +00003889
3890/*---------------------------------------------------------------*/
sewardjf8c37f72005-02-07 18:55:29 +00003891/*--- Helpers for dealing with, and describing, ---*/
3892/*--- guest state as a whole. ---*/
3893/*---------------------------------------------------------------*/
3894
3895/* Initialise the entire amd64 guest state. */
3896/* VISIBLE TO LIBVEX CLIENT */
3897void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
3898{
sewardjc6f970f2012-04-02 21:54:49 +00003899 vex_state->host_EvC_FAILADDR = 0;
3900 vex_state->host_EvC_COUNTER = 0;
3901 vex_state->pad0 = 0;
3902
sewardjf8c37f72005-02-07 18:55:29 +00003903 vex_state->guest_RAX = 0;
3904 vex_state->guest_RCX = 0;
3905 vex_state->guest_RDX = 0;
3906 vex_state->guest_RBX = 0;
3907 vex_state->guest_RSP = 0;
3908 vex_state->guest_RBP = 0;
3909 vex_state->guest_RSI = 0;
3910 vex_state->guest_RDI = 0;
3911 vex_state->guest_R8 = 0;
3912 vex_state->guest_R9 = 0;
3913 vex_state->guest_R10 = 0;
3914 vex_state->guest_R11 = 0;
3915 vex_state->guest_R12 = 0;
3916 vex_state->guest_R13 = 0;
3917 vex_state->guest_R14 = 0;
3918 vex_state->guest_R15 = 0;
3919
3920 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
3921 vex_state->guest_CC_DEP1 = 0;
3922 vex_state->guest_CC_DEP2 = 0;
3923 vex_state->guest_CC_NDEP = 0;
3924
sewardjd0a12df2005-02-10 02:07:43 +00003925 vex_state->guest_DFLAG = 1; /* forwards */
sewardj85520e42005-02-19 15:22:38 +00003926 vex_state->guest_IDFLAG = 0;
sewardj0e457fc2013-12-11 16:47:59 +00003927 vex_state->guest_ACFLAG = 0;
sewardjf8c37f72005-02-07 18:55:29 +00003928
philippee2cc4de2014-12-16 23:57:51 +00003929 /* HACK: represent the offset associated with a constant %fs.
3930 Typically, on linux, this assumes that %fs is only ever zero (main
3931 thread) or 0x63. */
3932 vex_state->guest_FS_CONST = 0;
sewardja6b93d12005-02-17 09:28:28 +00003933
sewardjf8c37f72005-02-07 18:55:29 +00003934 vex_state->guest_RIP = 0;
3935
sewardj8d965312005-02-25 02:48:47 +00003936 /* Initialise the simulated FPU */
3937 amd64g_dirtyhelper_FINIT( vex_state );
3938
sewardjc4530ae2012-05-21 10:18:49 +00003939 /* Initialise the AVX state. */
3940# define AVXZERO(_ymm) \
3941 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
3942 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
3943 } while (0)
sewardjcb6091d2005-02-21 08:23:39 +00003944 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
sewardjc4530ae2012-05-21 10:18:49 +00003945 AVXZERO(vex_state->guest_YMM0);
3946 AVXZERO(vex_state->guest_YMM1);
3947 AVXZERO(vex_state->guest_YMM2);
3948 AVXZERO(vex_state->guest_YMM3);
3949 AVXZERO(vex_state->guest_YMM4);
3950 AVXZERO(vex_state->guest_YMM5);
3951 AVXZERO(vex_state->guest_YMM6);
3952 AVXZERO(vex_state->guest_YMM7);
3953 AVXZERO(vex_state->guest_YMM8);
3954 AVXZERO(vex_state->guest_YMM9);
3955 AVXZERO(vex_state->guest_YMM10);
3956 AVXZERO(vex_state->guest_YMM11);
3957 AVXZERO(vex_state->guest_YMM12);
3958 AVXZERO(vex_state->guest_YMM13);
3959 AVXZERO(vex_state->guest_YMM14);
3960 AVXZERO(vex_state->guest_YMM15);
3961 AVXZERO(vex_state->guest_YMM16);
sewardjcb6091d2005-02-21 08:23:39 +00003962
sewardjc4530ae2012-05-21 10:18:49 +00003963# undef AVXZERO
sewardjf8c37f72005-02-07 18:55:29 +00003964
florian6ef84be2012-08-26 03:20:07 +00003965 vex_state->guest_EMNOTE = EmNote_NONE;
sewardj1f126c52005-03-16 13:57:58 +00003966
3967 /* These should not ever be either read or written, but we
3968 initialise them anyway. */
sewardj05f5e012014-05-04 10:52:11 +00003969 vex_state->guest_CMSTART = 0;
3970 vex_state->guest_CMLEN = 0;
sewardjce02aa72006-01-12 12:27:58 +00003971
sewardjd660d412008-12-03 21:29:59 +00003972 vex_state->guest_NRADDR = 0;
3973 vex_state->guest_SC_CLASS = 0;
philippee2cc4de2014-12-16 23:57:51 +00003974 vex_state->guest_GS_CONST = 0;
sewardjd660d412008-12-03 21:29:59 +00003975
sewardje86310f2009-03-19 22:21:40 +00003976 vex_state->guest_IP_AT_SYSCALL = 0;
sewardjc6f970f2012-04-02 21:54:49 +00003977 vex_state->pad1 = 0;
sewardjf8c37f72005-02-07 18:55:29 +00003978}
3979
3980
sewardj2f959cc2005-01-26 01:19:35 +00003981/* Figure out if any part of the guest state contained in minoff
3982 .. maxoff requires precise memory exceptions. If in doubt return
philippe6c46bef2012-08-14 22:29:01 +00003983 True (but this generates significantly slower code).
sewardj2f959cc2005-01-26 01:19:35 +00003984
sewardj4cca75c2005-03-16 11:52:25 +00003985 By default we enforce precise exns for guest %RSP, %RBP and %RIP
3986 only. These are the minimum needed to extract correct stack
3987 backtraces from amd64 code.
philippe6c46bef2012-08-14 22:29:01 +00003988
3989 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
sewardj2f959cc2005-01-26 01:19:35 +00003990*/
sewardjca2c3c72015-02-05 12:53:20 +00003991Bool guest_amd64_state_requires_precise_mem_exns (
3992 Int minoff, Int maxoff, VexRegisterUpdates pxControl
3993 )
sewardj44d494d2005-01-20 20:26:33 +00003994{
sewardj4cca75c2005-03-16 11:52:25 +00003995 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
3996 Int rbp_max = rbp_min + 8 - 1;
sewardj2f959cc2005-01-26 01:19:35 +00003997 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
3998 Int rsp_max = rsp_min + 8 - 1;
3999 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4000 Int rip_max = rip_min + 8 - 1;
4001
philippe6c46bef2012-08-14 22:29:01 +00004002 if (maxoff < rsp_min || minoff > rsp_max) {
4003 /* no overlap with rsp */
sewardjca2c3c72015-02-05 12:53:20 +00004004 if (pxControl == VexRegUpdSpAtMemAccess)
philippe6c46bef2012-08-14 22:29:01 +00004005 return False; // We only need to check stack pointer.
sewardj4cca75c2005-03-16 11:52:25 +00004006 } else {
4007 return True;
4008 }
4009
philippe6c46bef2012-08-14 22:29:01 +00004010 if (maxoff < rbp_min || minoff > rbp_max) {
4011 /* no overlap with rbp */
sewardj2f959cc2005-01-26 01:19:35 +00004012 } else {
4013 return True;
4014 }
4015
4016 if (maxoff < rip_min || minoff > rip_max) {
4017 /* no overlap with eip */
4018 } else {
4019 return True;
4020 }
4021
4022 return False;
sewardj44d494d2005-01-20 20:26:33 +00004023}
sewardj2f959cc2005-01-26 01:19:35 +00004024
4025
sewardjc85e91c2005-02-07 14:59:28 +00004026#define ALWAYSDEFD(field) \
4027 { offsetof(VexGuestAMD64State, field), \
4028 (sizeof ((VexGuestAMD64State*)0)->field) }
sewardj44d494d2005-01-20 20:26:33 +00004029
4030VexGuestLayout
sewardjc85e91c2005-02-07 14:59:28 +00004031 amd64guest_layout
sewardj44d494d2005-01-20 20:26:33 +00004032 = {
4033 /* Total size of the guest state, in bytes. */
sewardjc85e91c2005-02-07 14:59:28 +00004034 .total_sizeB = sizeof(VexGuestAMD64State),
sewardj44d494d2005-01-20 20:26:33 +00004035
4036 /* Describe the stack pointer. */
sewardjc85e91c2005-02-07 14:59:28 +00004037 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4038 .sizeof_SP = 8,
sewardj44d494d2005-01-20 20:26:33 +00004039
sewardja2033302008-08-19 11:15:10 +00004040 /* Describe the frame pointer. */
4041 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4042 .sizeof_FP = 8,
4043
sewardj44d494d2005-01-20 20:26:33 +00004044 /* Describe the instruction pointer. */
sewardjc85e91c2005-02-07 14:59:28 +00004045 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4046 .sizeof_IP = 8,
sewardj44d494d2005-01-20 20:26:33 +00004047
4048 /* Describe any sections to be regarded by Memcheck as
4049 'always-defined'. */
sewardje86310f2009-03-19 22:21:40 +00004050 .n_alwaysDefd = 16,
sewardj44d494d2005-01-20 20:26:33 +00004051
4052 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4053 and DEP2 have to be tracked. See detailed comment in
4054 gdefs.h on meaning of thunk fields. */
4055 .alwaysDefd
4056 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4057 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
sewardj85520e42005-02-19 15:22:38 +00004058 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4059 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4060 /* 4 */ ALWAYSDEFD(guest_RIP),
philippee2cc4de2014-12-16 23:57:51 +00004061 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
sewardj8d965312005-02-25 02:48:47 +00004062 /* 6 */ ALWAYSDEFD(guest_FTOP),
4063 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4064 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4065 /* 9 */ ALWAYSDEFD(guest_FC3210),
sewardj85520e42005-02-19 15:22:38 +00004066 // /* */ ALWAYSDEFD(guest_CS),
4067 // /* */ ALWAYSDEFD(guest_DS),
4068 // /* */ ALWAYSDEFD(guest_ES),
4069 // /* */ ALWAYSDEFD(guest_FS),
4070 // /* */ ALWAYSDEFD(guest_GS),
4071 // /* */ ALWAYSDEFD(guest_SS),
4072 // /* */ ALWAYSDEFD(guest_LDT),
4073 // /* */ ALWAYSDEFD(guest_GDT),
florian6ef84be2012-08-26 03:20:07 +00004074 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
sewardj16a403b2005-07-07 12:26:36 +00004075 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
sewardj05f5e012014-05-04 10:52:11 +00004076 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4077 /* 13 */ ALWAYSDEFD(guest_CMLEN),
sewardje86310f2009-03-19 22:21:40 +00004078 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4079 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
sewardj44d494d2005-01-20 20:26:33 +00004080 }
4081 };
njn9c6acb02004-11-30 15:56:47 +00004082
4083
njn9c6acb02004-11-30 15:56:47 +00004084/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004085/*--- end guest_amd64_helpers.c ---*/
njn9c6acb02004-11-30 15:56:47 +00004086/*---------------------------------------------------------------*/