blob: 8614efb1568e0ef8b6c47a9206b7d2701e40b139 [file] [log] [blame]
sewardjd20c8852005-01-20 20:04:07 +00001
2/*--------------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +00004/*--------------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardjd20c8852005-01-20 20:04:07 +00009
sewardje6c53e02011-10-23 07:33:43 +000010 Copyright (C) 2004-2011 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardjd20c8852005-01-20 20:04:07 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardjd20c8852005-01-20 20:04:07 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardjd20c8852005-01-20 20:04:07 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardjd20c8852005-01-20 20:04:07 +000034*/
35
sewardje9d8a262009-07-01 08:06:34 +000036/* Translates AMD64 code to IR. */
sewardj9ff93bc2005-03-23 11:25:12 +000037
sewardj820611e2005-08-24 10:56:01 +000038/* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
42
sewardje9d8a262009-07-01 08:06:34 +000043 x87 FP Limitations:
44
45 * all arithmetic done at 64 bits
46
47 * no FP exceptions, except for handling stack over/underflow
48
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
53
54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
56 even when it isn't.
57
58 * some of the FCOM cases could do with testing -- not convinced
59 that the args are the right way round.
60
61 * FSAVE does not re-initialise the FPU; it should do
62
63 * FINIT not only initialises the FPU environment, it also zeroes
64 all the FP registers. It should leave the registers unchanged.
65
66 RDTSC returns zero, always.
67
68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
69 per Intel docs this bit has no meaning anyway. Since PUSHF is the
70 only way to observe eflags[1], a proper fix would be to make that
71 bit be set by PUSHF.
72
73 This module uses global variables and so is not MT-safe (if that
74 should ever become relevant).
sewardj820611e2005-08-24 10:56:01 +000075*/
sewardj44d494d2005-01-20 20:26:33 +000076
sewardj42561ef2005-11-04 14:18:31 +000077/* Notes re address size overrides (0x67).
78
79 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
81 and System Instructions"), Section 1.2.3 ("Address-Size Override
82 Prefix"):
83
84 0x67 applies to all explicit memory references, causing the top
85 32 bits of the effective address to become zero.
86
87 0x67 has no effect on stack references (push/pop); these always
88 use a 64-bit address.
89
90 0x67 changes the interpretation of instructions which implicitly
91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
92 instead. These are:
93
94 cmp{s,sb,sw,sd,sq}
95 in{s,sb,sw,sd}
96 jcxz, jecxz, jrcxz
97 lod{s,sb,sw,sd,sq}
98 loop{,e,bz,be,z}
99 mov{s,sb,sw,sd,sq}
100 out{s,sb,sw,sd}
101 rep{,e,ne,nz}
102 sca{s,sb,sw,sd,sq}
103 sto{s,sb,sw,sd,sq}
104 xlat{,b} */
105
sewardjce02aa72006-01-12 12:27:58 +0000106/* "Special" instructions.
107
108 This instruction decoder can decode three special instructions
109 which mean nothing natively (are no-ops as far as regs/mem are
110 concerned) but have meaning for supporting Valgrind. A special
111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
114 Following that, one of the following 3 are allowed (standard
115 interpretation in parentheses):
116
117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
120
121 Any other bytes following the 16-byte preamble are illegal and
122 constitute a failure in instruction decoding. This all assumes
123 that the preamble will never occur except in specific code
124 fragments designed for Valgrind to catch.
125
sewardje9d8a262009-07-01 08:06:34 +0000126 No prefixes may precede a "Special" instruction.
127*/
sewardjce02aa72006-01-12 12:27:58 +0000128
sewardje9d8a262009-07-01 08:06:34 +0000129/* casLE (implementation of lock-prefixed insns) and rep-prefixed
130 insns: the side-exit back to the start of the insn is done with
131 Ijk_Boring. This is quite wrong, it should be done with
132 Ijk_NoRedir, since otherwise the side exit, which is intended to
133 restart the instruction for whatever reason, could go somewhere
134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
135 no-redir jumps performance critical, at least for rep-prefixed
136 instructions, since all iterations thereof would involve such a
137 jump. It's not such a big deal with casLE since the side exit is
138 only taken if the CAS fails, that is, the location is contended,
139 which is relatively unlikely.
sewardj1fb8c922009-07-12 12:56:53 +0000140
141 Note also, the test for CAS success vs failure is done using
142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
144 shouldn't definedness-check these comparisons. See
145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
146 background/rationale.
sewardje9d8a262009-07-01 08:06:34 +0000147*/
148
149/* LOCK prefixed instructions. These are translated using IR-level
150 CAS statements (IRCAS) and are believed to preserve atomicity, even
151 from the point of view of some other process racing against a
152 simulated one (presumably they communicate via a shared memory
153 segment).
154
155 Handlers which are aware of LOCK prefixes are:
156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
157 dis_cmpxchg_G_E (cmpxchg)
158 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
159 dis_Grp3 (not, neg)
160 dis_Grp4 (inc, dec)
161 dis_Grp5 (inc, dec)
162 dis_Grp8_Imm (bts, btc, btr)
163 dis_bt_G_E (bts, btc, btr)
164 dis_xadd_G_E (xadd)
165*/
166
sewardj44d494d2005-01-20 20:26:33 +0000167
168#include "libvex_basictypes.h"
169#include "libvex_ir.h"
170#include "libvex.h"
171#include "libvex_guest_amd64.h"
172
sewardjcef7d3e2009-07-02 12:21:59 +0000173#include "main_util.h"
174#include "main_globals.h"
175#include "guest_generic_bb_to_IR.h"
176#include "guest_generic_x87.h"
177#include "guest_amd64_defs.h"
sewardj44d494d2005-01-20 20:26:33 +0000178
179
sewardjecb94892005-01-21 14:26:37 +0000180/*------------------------------------------------------------*/
181/*--- Globals ---*/
182/*------------------------------------------------------------*/
183
sewardj9e6491a2005-07-02 19:24:10 +0000184/* These are set at the start of the translation of an insn, right
185 down in disInstr_AMD64, so that we don't have to pass them around
186 endlessly. They are all constant during the translation of any
187 given insn. */
sewardj4b744762005-02-07 15:02:25 +0000188
sewardjecb94892005-01-21 14:26:37 +0000189/* These are set at the start of the translation of a BB, so
190 that we don't have to pass them around endlessly. */
191
192/* We need to know this to do sub-register accesses correctly. */
sewardjecb94892005-01-21 14:26:37 +0000193static Bool host_is_bigendian;
194
sewardj9e6491a2005-07-02 19:24:10 +0000195/* Pointer to the guest code area (points to start of BB, not to the
196 insn being processed). */
sewardjb3a04292005-01-21 20:33:44 +0000197static UChar* guest_code;
198
sewardjdf0e0022005-01-25 15:48:43 +0000199/* The guest address corresponding to guest_code[0]. */
sewardj9e6491a2005-07-02 19:24:10 +0000200static Addr64 guest_RIP_bbstart;
sewardj4b744762005-02-07 15:02:25 +0000201
sewardjb3a04292005-01-21 20:33:44 +0000202/* The guest address for the instruction currently being
203 translated. */
sewardj9e6491a2005-07-02 19:24:10 +0000204static Addr64 guest_RIP_curr_instr;
sewardjecb94892005-01-21 14:26:37 +0000205
sewardjdd40fdf2006-12-24 02:20:24 +0000206/* The IRSB* into which we're generating code. */
207static IRSB* irsb;
sewardjecb94892005-01-21 14:26:37 +0000208
sewardj4b744762005-02-07 15:02:25 +0000209/* For ensuring that %rip-relative addressing is done right. A read
210 of %rip generates the address of the next instruction. It may be
211 that we don't conveniently know that inside disAMode(). For sanity
212 checking, if the next insn %rip is needed, we make a guess at what
213 it is, record that guess here, and set the accompanying Bool to
214 indicate that -- after this insn's decode is finished -- that guess
215 needs to be checked. */
216
217/* At the start of each insn decode, is set to (0, False).
218 After the decode, if _mustcheck is now True, _assumed is
219 checked. */
220
sewardj9e6491a2005-07-02 19:24:10 +0000221static Addr64 guest_RIP_next_assumed;
222static Bool guest_RIP_next_mustcheck;
sewardj4b744762005-02-07 15:02:25 +0000223
224
sewardjecb94892005-01-21 14:26:37 +0000225/*------------------------------------------------------------*/
226/*--- Helpers for constructing IR. ---*/
227/*------------------------------------------------------------*/
228
sewardjb3a04292005-01-21 20:33:44 +0000229/* Generate a new temporary of the given type. */
230static IRTemp newTemp ( IRType ty )
231{
sewardj496a58d2005-03-20 18:44:44 +0000232 vassert(isPlausibleIRType(ty));
sewardjdd40fdf2006-12-24 02:20:24 +0000233 return newIRTemp( irsb->tyenv, ty );
sewardjb3a04292005-01-21 20:33:44 +0000234}
235
sewardjdd40fdf2006-12-24 02:20:24 +0000236/* Add a statement to the list held by "irsb". */
sewardjecb94892005-01-21 14:26:37 +0000237static void stmt ( IRStmt* st )
238{
sewardjdd40fdf2006-12-24 02:20:24 +0000239 addStmtToIRSB( irsb, st );
sewardjecb94892005-01-21 14:26:37 +0000240}
sewardjb3a04292005-01-21 20:33:44 +0000241
242/* Generate a statement "dst := e". */
243static void assign ( IRTemp dst, IRExpr* e )
244{
sewardjdd40fdf2006-12-24 02:20:24 +0000245 stmt( IRStmt_WrTmp(dst, e) );
sewardjb3a04292005-01-21 20:33:44 +0000246}
247
sewardjecb94892005-01-21 14:26:37 +0000248static IRExpr* unop ( IROp op, IRExpr* a )
249{
250 return IRExpr_Unop(op, a);
251}
252
sewardjb3a04292005-01-21 20:33:44 +0000253static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
254{
255 return IRExpr_Binop(op, a1, a2);
256}
257
sewardj4796d662006-02-05 16:06:26 +0000258static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
259{
260 return IRExpr_Triop(op, a1, a2, a3);
261}
262
sewardjdf0e0022005-01-25 15:48:43 +0000263static IRExpr* mkexpr ( IRTemp tmp )
264{
sewardjdd40fdf2006-12-24 02:20:24 +0000265 return IRExpr_RdTmp(tmp);
sewardjdf0e0022005-01-25 15:48:43 +0000266}
sewardjb3a04292005-01-21 20:33:44 +0000267
sewardj3ca55a12005-01-27 16:06:23 +0000268static IRExpr* mkU8 ( ULong i )
sewardjb3a04292005-01-21 20:33:44 +0000269{
270 vassert(i < 256);
sewardj3ca55a12005-01-27 16:06:23 +0000271 return IRExpr_Const(IRConst_U8( (UChar)i ));
sewardjb3a04292005-01-21 20:33:44 +0000272}
273
sewardj5e525292005-01-28 15:13:10 +0000274static IRExpr* mkU16 ( ULong i )
275{
276 vassert(i < 0x10000ULL);
277 return IRExpr_Const(IRConst_U16( (UShort)i ));
278}
sewardj3ca55a12005-01-27 16:06:23 +0000279
280static IRExpr* mkU32 ( ULong i )
281{
282 vassert(i < 0x100000000ULL);
283 return IRExpr_Const(IRConst_U32( (UInt)i ));
284}
sewardjb3a04292005-01-21 20:33:44 +0000285
286static IRExpr* mkU64 ( ULong i )
287{
288 return IRExpr_Const(IRConst_U64(i));
289}
sewardjecb94892005-01-21 14:26:37 +0000290
sewardj3ca55a12005-01-27 16:06:23 +0000291static IRExpr* mkU ( IRType ty, ULong i )
292{
293 switch (ty) {
294 case Ity_I8: return mkU8(i);
sewardj5e525292005-01-28 15:13:10 +0000295 case Ity_I16: return mkU16(i);
sewardj3ca55a12005-01-27 16:06:23 +0000296 case Ity_I32: return mkU32(i);
297 case Ity_I64: return mkU64(i);
298 default: vpanic("mkU(amd64)");
299 }
300}
301
sewardj5e525292005-01-28 15:13:10 +0000302static void storeLE ( IRExpr* addr, IRExpr* data )
303{
sewardje768e922009-11-26 17:17:37 +0000304 stmt( IRStmt_Store(Iend_LE, addr, data) );
sewardj5e525292005-01-28 15:13:10 +0000305}
306
sewardje768e922009-11-26 17:17:37 +0000307static IRExpr* loadLE ( IRType ty, IRExpr* addr )
sewardj5e525292005-01-28 15:13:10 +0000308{
sewardje768e922009-11-26 17:17:37 +0000309 return IRExpr_Load(Iend_LE, ty, addr);
sewardj5e525292005-01-28 15:13:10 +0000310}
311
312static IROp mkSizedOp ( IRType ty, IROp op8 )
313{
314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
315 || op8 == Iop_Mul8
316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
sewardj1fb8c922009-07-12 12:56:53 +0000319 || op8 == Iop_CasCmpNE8
sewardj5e525292005-01-28 15:13:10 +0000320 || op8 == Iop_Not8 );
321 switch (ty) {
322 case Ity_I8: return 0 +op8;
323 case Ity_I16: return 1 +op8;
324 case Ity_I32: return 2 +op8;
325 case Ity_I64: return 3 +op8;
326 default: vpanic("mkSizedOp(amd64)");
327 }
328}
329
330static
331IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
332{
333 if (szSmall == 1 && szBig == 4) {
334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
335 }
336 if (szSmall == 1 && szBig == 2) {
337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
338 }
339 if (szSmall == 2 && szBig == 4) {
340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
341 }
342 if (szSmall == 1 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000343 return unop(Iop_8Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000344 }
sewardj03b07cc2005-01-31 18:09:43 +0000345 if (szSmall == 1 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000346 return unop(Iop_8Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000347 }
sewardj5e525292005-01-28 15:13:10 +0000348 if (szSmall == 2 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000349 return unop(Iop_16Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000350 }
sewardj03b07cc2005-01-31 18:09:43 +0000351 if (szSmall == 2 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000352 return unop(Iop_16Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000353 }
sewardj5e525292005-01-28 15:13:10 +0000354 vpanic("doScalarWidening(amd64)");
355}
356
357
sewardjecb94892005-01-21 14:26:37 +0000358
359/*------------------------------------------------------------*/
360/*--- Debugging output ---*/
361/*------------------------------------------------------------*/
362
sewardjb3a04292005-01-21 20:33:44 +0000363/* Bomb out if we can't handle something. */
364__attribute__ ((noreturn))
365static void unimplemented ( HChar* str )
366{
367 vex_printf("amd64toIR: unimplemented feature\n");
368 vpanic(str);
369}
370
sewardjecb94892005-01-21 14:26:37 +0000371#define DIP(format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000372 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000373 vex_printf(format, ## args)
374
375#define DIS(buf, format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000376 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000377 vex_sprintf(buf, format, ## args)
378
379
380/*------------------------------------------------------------*/
381/*--- Offsets of various parts of the amd64 guest state. ---*/
382/*------------------------------------------------------------*/
383
384#define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385#define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386#define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387#define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388#define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389#define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390#define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391#define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392#define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393#define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394#define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395#define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396#define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397#define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398#define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399#define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
400
401#define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
402
sewardja6b93d12005-02-17 09:28:28 +0000403#define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
sewardjd660d412008-12-03 21:29:59 +0000404#define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60)
sewardja6b93d12005-02-17 09:28:28 +0000405
sewardjecb94892005-01-21 14:26:37 +0000406#define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407#define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408#define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409#define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
410
sewardj8d965312005-02-25 02:48:47 +0000411#define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412#define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
sewardjd0a12df2005-02-10 02:07:43 +0000413#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
sewardj5e120aa2010-09-28 15:59:04 +0000414#define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
sewardj85520e42005-02-19 15:22:38 +0000415#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
sewardj8d965312005-02-25 02:48:47 +0000416#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
sewardj25a85812005-05-08 23:03:48 +0000417#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
sewardjc49ce232005-02-25 13:03:03 +0000418#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
sewardj1001dc42005-02-21 08:25:55 +0000419
420#define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
sewardjc4530ae2012-05-21 10:18:49 +0000421#define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422#define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423#define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424#define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425#define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426#define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427#define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428#define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429#define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430#define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431#define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432#define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433#define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434#define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435#define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436#define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437#define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
sewardj1001dc42005-02-21 08:25:55 +0000438
sewardjbcbb9de2005-03-27 02:22:32 +0000439#define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN)
sewardj3e616e62006-01-07 22:58:54 +0000440#define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART)
441#define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN)
sewardjdf0e0022005-01-25 15:48:43 +0000442
sewardjce02aa72006-01-12 12:27:58 +0000443#define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
444
sewardjdf0e0022005-01-25 15:48:43 +0000445
446/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +0000447/*--- Helper bits and pieces for deconstructing the ---*/
448/*--- amd64 insn stream. ---*/
449/*------------------------------------------------------------*/
450
451/* This is the AMD64 register encoding -- integer regs. */
452#define R_RAX 0
453#define R_RCX 1
454#define R_RDX 2
455#define R_RBX 3
456#define R_RSP 4
457#define R_RBP 5
458#define R_RSI 6
459#define R_RDI 7
460#define R_R8 8
461#define R_R9 9
462#define R_R10 10
463#define R_R11 11
464#define R_R12 12
465#define R_R13 13
466#define R_R14 14
467#define R_R15 15
468
sewardjecb94892005-01-21 14:26:37 +0000469/* This is the Intel register encoding -- segment regs. */
470#define R_ES 0
471#define R_CS 1
472#define R_SS 2
473#define R_DS 3
474#define R_FS 4
475#define R_GS 5
476
477
sewardjb3a04292005-01-21 20:33:44 +0000478/* Various simple conversions */
479
480static ULong extend_s_8to64 ( UChar x )
481{
482 return (ULong)((((Long)x) << 56) >> 56);
483}
484
485static ULong extend_s_16to64 ( UShort x )
486{
487 return (ULong)((((Long)x) << 48) >> 48);
488}
489
490static ULong extend_s_32to64 ( UInt x )
491{
492 return (ULong)((((Long)x) << 32) >> 32);
493}
494
sewardjdf0e0022005-01-25 15:48:43 +0000495/* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
sewardj5b470602005-02-27 13:10:48 +0000498inline
sewardjdf0e0022005-01-25 15:48:43 +0000499static Bool epartIsReg ( UChar mod_reg_rm )
500{
sewardj7a240552005-01-28 21:37:12 +0000501 return toBool(0xC0 == (mod_reg_rm & 0xC0));
sewardjdf0e0022005-01-25 15:48:43 +0000502}
503
sewardj901ed122005-02-27 13:25:31 +0000504/* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
507inline
508static Int gregLO3ofRM ( UChar mod_reg_rm )
sewardjdf0e0022005-01-25 15:48:43 +0000509{
510 return (Int)( (mod_reg_rm >> 3) & 7 );
511}
512
sewardj8711f662005-05-09 17:52:56 +0000513/* Ditto the 'e' field of a modRM byte. */
514inline
515static Int eregLO3ofRM ( UChar mod_reg_rm )
516{
517 return (Int)(mod_reg_rm & 0x7);
518}
519
sewardjdf0e0022005-01-25 15:48:43 +0000520/* Get a 8/16/32-bit unsigned value out of the insn stream. */
521
sewardj80611e32012-01-20 13:07:24 +0000522static inline UChar getUChar ( Long delta )
sewardjdf0e0022005-01-25 15:48:43 +0000523{
sewardj8c332e22005-01-28 01:36:56 +0000524 UChar v = guest_code[delta+0];
525 return v;
sewardjdf0e0022005-01-25 15:48:43 +0000526}
527
sewardj47c2d4d2006-11-14 17:50:16 +0000528static UInt getUDisp16 ( Long delta )
529{
530 UInt v = guest_code[delta+1]; v <<= 8;
531 v |= guest_code[delta+0];
532 return v & 0xFFFF;
533}
534
sewardj270def42005-07-03 01:03:01 +0000535//.. static UInt getUDisp ( Int size, Long delta )
sewardjd20c8852005-01-20 20:04:07 +0000536//.. {
537//.. switch (size) {
538//.. case 4: return getUDisp32(delta);
539//.. case 2: return getUDisp16(delta);
540//.. case 1: return getUChar(delta);
541//.. default: vpanic("getUDisp(x86)");
542//.. }
543//.. return 0; /*notreached*/
544//.. }
sewardjb3a04292005-01-21 20:33:44 +0000545
546
547/* Get a byte value out of the insn stream and sign-extend to 64
548 bits. */
sewardj270def42005-07-03 01:03:01 +0000549static Long getSDisp8 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000550{
551 return extend_s_8to64( guest_code[delta] );
552}
553
sewardj5e525292005-01-28 15:13:10 +0000554/* Get a 16-bit value out of the insn stream and sign-extend to 64
555 bits. */
sewardj270def42005-07-03 01:03:01 +0000556static Long getSDisp16 ( Long delta )
sewardj5e525292005-01-28 15:13:10 +0000557{
sewardj118b23e2005-01-29 02:14:44 +0000558 UInt v = guest_code[delta+1]; v <<= 8;
sewardj5e525292005-01-28 15:13:10 +0000559 v |= guest_code[delta+0];
sewardj118b23e2005-01-29 02:14:44 +0000560 return extend_s_16to64( (UShort)v );
sewardj5e525292005-01-28 15:13:10 +0000561}
562
sewardjb3a04292005-01-21 20:33:44 +0000563/* Get a 32-bit value out of the insn stream and sign-extend to 64
564 bits. */
sewardj270def42005-07-03 01:03:01 +0000565static Long getSDisp32 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000566{
567 UInt v = guest_code[delta+3]; v <<= 8;
568 v |= guest_code[delta+2]; v <<= 8;
569 v |= guest_code[delta+1]; v <<= 8;
570 v |= guest_code[delta+0];
571 return extend_s_32to64( v );
572}
573
sewardj03b07cc2005-01-31 18:09:43 +0000574/* Get a 64-bit value out of the insn stream. */
sewardj270def42005-07-03 01:03:01 +0000575static Long getDisp64 ( Long delta )
sewardj03b07cc2005-01-31 18:09:43 +0000576{
sewardj7eaa7cf2005-01-31 18:55:22 +0000577 ULong v = 0;
sewardj03b07cc2005-01-31 18:09:43 +0000578 v |= guest_code[delta+7]; v <<= 8;
579 v |= guest_code[delta+6]; v <<= 8;
580 v |= guest_code[delta+5]; v <<= 8;
581 v |= guest_code[delta+4]; v <<= 8;
582 v |= guest_code[delta+3]; v <<= 8;
583 v |= guest_code[delta+2]; v <<= 8;
584 v |= guest_code[delta+1]; v <<= 8;
585 v |= guest_code[delta+0];
586 return v;
587}
588
sewardj3ca55a12005-01-27 16:06:23 +0000589/* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
sewardj270def42005-07-03 01:03:01 +0000591static Long getSDisp ( Int size, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +0000592{
593 switch (size) {
594 case 4: return getSDisp32(delta);
sewardj5e525292005-01-28 15:13:10 +0000595 case 2: return getSDisp16(delta);
sewardj3ca55a12005-01-27 16:06:23 +0000596 case 1: return getSDisp8(delta);
597 default: vpanic("getSDisp(amd64)");
598 }
599}
600
sewardj1389d4d2005-01-28 13:46:29 +0000601static ULong mkSizeMask ( Int sz )
sewardj3ca55a12005-01-27 16:06:23 +0000602{
603 switch (sz) {
sewardj1389d4d2005-01-28 13:46:29 +0000604 case 1: return 0x00000000000000FFULL;
605 case 2: return 0x000000000000FFFFULL;
606 case 4: return 0x00000000FFFFFFFFULL;
sewardj3ca55a12005-01-27 16:06:23 +0000607 case 8: return 0xFFFFFFFFFFFFFFFFULL;
608 default: vpanic("mkSzMask(amd64)");
609 }
610}
611
612static Int imin ( Int a, Int b )
613{
614 return (a < b) ? a : b;
615}
sewardjecb94892005-01-21 14:26:37 +0000616
sewardj5b470602005-02-27 13:10:48 +0000617static IRType szToITy ( Int n )
618{
619 switch (n) {
620 case 1: return Ity_I8;
621 case 2: return Ity_I16;
622 case 4: return Ity_I32;
623 case 8: return Ity_I64;
sewardjf53b7352005-04-06 20:01:56 +0000624 default: vex_printf("\nszToITy(%d)\n", n);
625 vpanic("szToITy(amd64)");
sewardj5b470602005-02-27 13:10:48 +0000626 }
627}
628
sewardjecb94892005-01-21 14:26:37 +0000629
630/*------------------------------------------------------------*/
631/*--- For dealing with prefixes. ---*/
632/*------------------------------------------------------------*/
633
634/* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
638 instructions.
639
sewardjc4530ae2012-05-21 10:18:49 +0000640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
sewardjdf0e0022005-01-25 15:48:43 +0000642
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
sewardj5b470602005-02-27 13:10:48 +0000645 * if REX is not present then REXW,REXR,REXX,REXB will read
646 as zero.
sewardjdf0e0022005-01-25 15:48:43 +0000647 * F2 and F3 will not both be 1.
sewardjecb94892005-01-21 14:26:37 +0000648*/
649
650typedef UInt Prefix;
651
sewardjc4530ae2012-05-21 10:18:49 +0000652#define PFX_ASO (1<<0) /* address-size override present (0x67) */
653#define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654#define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655#define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656#define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657#define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658#define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659#define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660#define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661#define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662#define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663#define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664#define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665#define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666#define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667#define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668#define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669#define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670/* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
672 positions. */
673#define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674#define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675#define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676#define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
sewardj3ca55a12005-01-27 16:06:23 +0000677
sewardjc4530ae2012-05-21 10:18:49 +0000678
679#define PFX_EMPTY 0x55000000
sewardjecb94892005-01-21 14:26:37 +0000680
sewardjb3a04292005-01-21 20:33:44 +0000681static Bool IS_VALID_PFX ( Prefix pfx ) {
sewardjc4530ae2012-05-21 10:18:49 +0000682 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
sewardjecb94892005-01-21 14:26:37 +0000683}
684
sewardjb3a04292005-01-21 20:33:44 +0000685static Bool haveREX ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000686 return toBool(pfx & PFX_REX);
sewardjecb94892005-01-21 14:26:37 +0000687}
688
sewardj5e525292005-01-28 15:13:10 +0000689static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
691}
sewardjdf0e0022005-01-25 15:48:43 +0000692static Int getRexR ( Prefix pfx ) {
693 return (pfx & PFX_REXR) ? 1 : 0;
694}
sewardj5b470602005-02-27 13:10:48 +0000695static Int getRexX ( Prefix pfx ) {
696 return (pfx & PFX_REXX) ? 1 : 0;
697}
sewardjdf0e0022005-01-25 15:48:43 +0000698static Int getRexB ( Prefix pfx ) {
699 return (pfx & PFX_REXB) ? 1 : 0;
700}
701
sewardj3ca55a12005-01-27 16:06:23 +0000702/* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704static Bool haveF2orF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
sewardj3ca55a12005-01-27 16:06:23 +0000706}
sewardj55dbb262005-01-28 16:36:51 +0000707static Bool haveF2 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000708 return toBool((pfx & PFX_F2) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000709}
710static Bool haveF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000711 return toBool((pfx & PFX_F3) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000712}
sewardj6359f812005-07-20 10:15:34 +0000713
sewardjc8b26352005-07-20 09:23:13 +0000714static Bool have66 ( Prefix pfx ) {
715 return toBool((pfx & PFX_66) > 0);
716}
sewardj6359f812005-07-20 10:15:34 +0000717static Bool haveASO ( Prefix pfx ) {
718 return toBool((pfx & PFX_ASO) > 0);
719}
sewardjecb94892005-01-21 14:26:37 +0000720
sewardj1001dc42005-02-21 08:25:55 +0000721/* Return True iff pfx has 66 set and F2 and F3 clear */
722static Bool have66noF2noF3 ( Prefix pfx )
723{
724 return
sewardj8d965312005-02-25 02:48:47 +0000725 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
sewardj1001dc42005-02-21 08:25:55 +0000726}
727
728/* Return True iff pfx has F2 set and 66 and F3 clear */
729static Bool haveF2no66noF3 ( Prefix pfx )
730{
731 return
sewardj8d965312005-02-25 02:48:47 +0000732 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
733}
734
735/* Return True iff pfx has F3 set and 66 and F2 clear */
736static Bool haveF3no66noF2 ( Prefix pfx )
737{
738 return
739 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
sewardj1001dc42005-02-21 08:25:55 +0000740}
741
sewardjfd181282010-06-14 21:29:35 +0000742/* Return True iff pfx has F3 set and F2 clear */
743static Bool haveF3noF2 ( Prefix pfx )
744{
745 return
746 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
747}
748
sewardj186f8692011-01-21 17:51:44 +0000749/* Return True iff pfx has F2 set and F3 clear */
750static Bool haveF2noF3 ( Prefix pfx )
751{
752 return
753 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
754}
755
sewardj1001dc42005-02-21 08:25:55 +0000756/* Return True iff pfx has 66, F2 and F3 clear */
757static Bool haveNo66noF2noF3 ( Prefix pfx )
758{
759 return
sewardj8d965312005-02-25 02:48:47 +0000760 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
sewardj1001dc42005-02-21 08:25:55 +0000761}
762
sewardj8711f662005-05-09 17:52:56 +0000763/* Return True iff pfx has any of 66, F2 and F3 set */
764static Bool have66orF2orF3 ( Prefix pfx )
765{
sewardjca673ab2005-05-11 10:03:08 +0000766 return toBool( ! haveNo66noF2noF3(pfx) );
sewardj8711f662005-05-09 17:52:56 +0000767}
768
sewardj47c2d4d2006-11-14 17:50:16 +0000769/* Return True iff pfx has 66 or F2 set */
770static Bool have66orF2 ( Prefix pfx )
771{
772 return toBool((pfx & (PFX_66|PFX_F2)) > 0);
773}
774
sewardj1389d4d2005-01-28 13:46:29 +0000775/* Clear all the segment-override bits in a prefix. */
776static Prefix clearSegBits ( Prefix p )
777{
sewardj1001dc42005-02-21 08:25:55 +0000778 return
779 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
780}
781
sewardjc4530ae2012-05-21 10:18:49 +0000782/* Get the (inverted, hence back to "normal") VEX.vvvv field. */
783static UInt getVexNvvvv ( Prefix pfx ) {
784 UInt r = (UInt)pfx;
785 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
786 return r & 0xF;
787}
788
789static Bool haveVEX ( Prefix pfx ) {
790 return toBool(pfx & PFX_VEX);
791}
792
793static Int getVexL ( Prefix pfx ) {
794 return (pfx & PFX_VEXL) ? 1 : 0;
795}
796
sewardj1389d4d2005-01-28 13:46:29 +0000797
sewardjecb94892005-01-21 14:26:37 +0000798/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +0000799/*--- For dealing with escapes ---*/
800/*------------------------------------------------------------*/
801
802
803/* Escapes come after the prefixes, but before the primary opcode
804 byte. They escape the primary opcode byte into a bigger space.
805 The 0xF0000000 isn't significant, except so as to make it not
806 overlap valid Prefix values, for sanity checking.
807*/
808
809typedef
810 enum {
811 ESC_NONE=0xF0000000, // none
812 ESC_0F, // 0F
813 ESC_0F38, // 0F 38
814 ESC_0F3A // 0F 3A
815 }
816 Escape;
817
818
819/*------------------------------------------------------------*/
sewardj5b470602005-02-27 13:10:48 +0000820/*--- For dealing with integer registers ---*/
sewardjecb94892005-01-21 14:26:37 +0000821/*------------------------------------------------------------*/
822
sewardj5b470602005-02-27 13:10:48 +0000823/* This is somewhat complex. The rules are:
824
825 For 64, 32 and 16 bit register references, the e or g fields in the
826 modrm bytes supply the low 3 bits of the register number. The
827 fourth (most-significant) bit of the register number is supplied by
828 the REX byte, if it is present; else that bit is taken to be zero.
829
830 The REX.R bit supplies the high bit corresponding to the g register
831 field, and the REX.B bit supplies the high bit corresponding to the
832 e register field (when the mod part of modrm indicates that modrm's
833 e component refers to a register and not to memory).
834
835 The REX.X bit supplies a high register bit for certain registers
836 in SIB address modes, and is generally rarely used.
837
838 For 8 bit register references, the presence of the REX byte itself
839 has significance. If there is no REX present, then the 3-bit
840 number extracted from the modrm e or g field is treated as an index
841 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
842 old x86 encoding scheme.
843
844 But if there is a REX present, the register reference is
845 interpreted in the same way as for 64/32/16-bit references: a high
846 bit is extracted from REX, giving a 4-bit number, and the denoted
847 register is the lowest 8 bits of the 16 integer registers denoted
848 by the number. In particular, values 3 through 7 of this sequence
849 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
850 %rsp %rbp %rsi %rdi.
851
852 The REX.W bit has no bearing at all on register numbers. Instead
853 its presence indicates that the operand size is to be overridden
854 from its default value (32 bits) to 64 bits instead. This is in
855 the same fashion that an 0x66 prefix indicates the operand size is
856 to be overridden from 32 bits down to 16 bits. When both REX.W and
857 0x66 are present there is a conflict, and REX.W takes precedence.
858
859 Rather than try to handle this complexity using a single huge
860 function, several smaller ones are provided. The aim is to make it
861 as difficult as possible to screw up register decoding in a subtle
862 and hard-to-track-down way.
863
864 Because these routines fish around in the host's memory (that is,
865 in the guest state area) for sub-parts of guest registers, their
866 correctness depends on the host's endianness. So far these
867 routines only work for little-endian hosts. Those for which
868 endianness is important have assertions to ensure sanity.
869*/
sewardjecb94892005-01-21 14:26:37 +0000870
871
sewardj5b470602005-02-27 13:10:48 +0000872/* About the simplest question you can ask: where do the 64-bit
873 integer registers live (in the guest state) ? */
sewardjecb94892005-01-21 14:26:37 +0000874
sewardj3ca55a12005-01-27 16:06:23 +0000875static Int integerGuestReg64Offset ( UInt reg )
sewardjb3a04292005-01-21 20:33:44 +0000876{
877 switch (reg) {
878 case R_RAX: return OFFB_RAX;
879 case R_RCX: return OFFB_RCX;
880 case R_RDX: return OFFB_RDX;
881 case R_RBX: return OFFB_RBX;
882 case R_RSP: return OFFB_RSP;
883 case R_RBP: return OFFB_RBP;
884 case R_RSI: return OFFB_RSI;
885 case R_RDI: return OFFB_RDI;
886 case R_R8: return OFFB_R8;
887 case R_R9: return OFFB_R9;
888 case R_R10: return OFFB_R10;
889 case R_R11: return OFFB_R11;
890 case R_R12: return OFFB_R12;
891 case R_R13: return OFFB_R13;
892 case R_R14: return OFFB_R14;
893 case R_R15: return OFFB_R15;
894 default: vpanic("integerGuestReg64Offset(amd64)");
895 }
896}
897
898
sewardj5b470602005-02-27 13:10:48 +0000899/* Produce the name of an integer register, for printing purposes.
900 reg is a number in the range 0 .. 15 that has been generated from a
901 3-bit reg-field number and a REX extension bit. irregular denotes
902 the case where sz==1 and no REX byte is present. */
sewardjecb94892005-01-21 14:26:37 +0000903
904static
sewardj5b470602005-02-27 13:10:48 +0000905HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000906{
sewardjecb94892005-01-21 14:26:37 +0000907 static HChar* ireg64_names[16]
908 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
909 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
910 static HChar* ireg32_names[16]
911 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
912 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
913 static HChar* ireg16_names[16]
914 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
915 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
916 static HChar* ireg8_names[16]
917 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
918 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
sewardj5b470602005-02-27 13:10:48 +0000919 static HChar* ireg8_irregular[8]
sewardjecb94892005-01-21 14:26:37 +0000920 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
921
sewardj5b470602005-02-27 13:10:48 +0000922 vassert(reg < 16);
923 if (sz == 1) {
924 if (irregular)
925 vassert(reg < 8);
926 } else {
927 vassert(irregular == False);
928 }
sewardjecb94892005-01-21 14:26:37 +0000929
930 switch (sz) {
sewardj5b470602005-02-27 13:10:48 +0000931 case 8: return ireg64_names[reg];
932 case 4: return ireg32_names[reg];
933 case 2: return ireg16_names[reg];
934 case 1: if (irregular) {
935 return ireg8_irregular[reg];
936 } else {
937 return ireg8_names[reg];
938 }
939 default: vpanic("nameIReg(amd64)");
sewardjecb94892005-01-21 14:26:37 +0000940 }
sewardjecb94892005-01-21 14:26:37 +0000941}
942
sewardj5b470602005-02-27 13:10:48 +0000943/* Using the same argument conventions as nameIReg, produce the
944 guest state offset of an integer register. */
sewardjb3a04292005-01-21 20:33:44 +0000945
sewardjecb94892005-01-21 14:26:37 +0000946static
sewardj5b470602005-02-27 13:10:48 +0000947Int offsetIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000948{
sewardj5b470602005-02-27 13:10:48 +0000949 vassert(reg < 16);
950 if (sz == 1) {
951 if (irregular)
952 vassert(reg < 8);
953 } else {
954 vassert(irregular == False);
sewardjecb94892005-01-21 14:26:37 +0000955 }
sewardj5b470602005-02-27 13:10:48 +0000956
957 /* Deal with irregular case -- sz==1 and no REX present */
958 if (sz == 1 && irregular) {
959 switch (reg) {
960 case R_RSP: return 1+ OFFB_RAX;
961 case R_RBP: return 1+ OFFB_RCX;
962 case R_RSI: return 1+ OFFB_RDX;
963 case R_RDI: return 1+ OFFB_RBX;
964 default: break; /* use the normal case */
965 }
sewardjecb94892005-01-21 14:26:37 +0000966 }
sewardj5b470602005-02-27 13:10:48 +0000967
968 /* Normal case */
969 return integerGuestReg64Offset(reg);
sewardjecb94892005-01-21 14:26:37 +0000970}
971
972
sewardj5b470602005-02-27 13:10:48 +0000973/* Read the %CL register :: Ity_I8, for shift/rotate operations. */
974
975static IRExpr* getIRegCL ( void )
976{
977 vassert(!host_is_bigendian);
978 return IRExpr_Get( OFFB_RCX, Ity_I8 );
979}
980
981
982/* Write to the %AH register. */
983
984static void putIRegAH ( IRExpr* e )
985{
986 vassert(!host_is_bigendian);
sewardjdd40fdf2006-12-24 02:20:24 +0000987 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
sewardj5b470602005-02-27 13:10:48 +0000988 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
989}
990
991
992/* Read/write various widths of %RAX, as it has various
993 special-purpose uses. */
994
995static HChar* nameIRegRAX ( Int sz )
996{
997 switch (sz) {
998 case 1: return "%al";
999 case 2: return "%ax";
1000 case 4: return "%eax";
1001 case 8: return "%rax";
1002 default: vpanic("nameIRegRAX(amd64)");
1003 }
1004}
1005
1006static IRExpr* getIRegRAX ( Int sz )
1007{
1008 vassert(!host_is_bigendian);
1009 switch (sz) {
1010 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1011 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001012 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001013 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1014 default: vpanic("getIRegRAX(amd64)");
1015 }
1016}
1017
1018static void putIRegRAX ( Int sz, IRExpr* e )
1019{
sewardjdd40fdf2006-12-24 02:20:24 +00001020 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj5b470602005-02-27 13:10:48 +00001021 vassert(!host_is_bigendian);
1022 switch (sz) {
1023 case 8: vassert(ty == Ity_I64);
1024 stmt( IRStmt_Put( OFFB_RAX, e ));
1025 break;
1026 case 4: vassert(ty == Ity_I32);
1027 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1028 break;
1029 case 2: vassert(ty == Ity_I16);
1030 stmt( IRStmt_Put( OFFB_RAX, e ));
1031 break;
1032 case 1: vassert(ty == Ity_I8);
1033 stmt( IRStmt_Put( OFFB_RAX, e ));
1034 break;
1035 default: vpanic("putIRegRAX(amd64)");
1036 }
1037}
1038
1039
1040/* Read/write various widths of %RDX, as it has various
1041 special-purpose uses. */
1042
sewardjbb4396c2007-11-20 17:29:08 +00001043static HChar* nameIRegRDX ( Int sz )
1044{
1045 switch (sz) {
1046 case 1: return "%dl";
1047 case 2: return "%dx";
1048 case 4: return "%edx";
1049 case 8: return "%rdx";
1050 default: vpanic("nameIRegRDX(amd64)");
1051 }
1052}
1053
sewardj5b470602005-02-27 13:10:48 +00001054static IRExpr* getIRegRDX ( Int sz )
1055{
1056 vassert(!host_is_bigendian);
1057 switch (sz) {
1058 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1059 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001060 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001061 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1062 default: vpanic("getIRegRDX(amd64)");
1063 }
1064}
1065
1066static void putIRegRDX ( Int sz, IRExpr* e )
1067{
1068 vassert(!host_is_bigendian);
sewardjdd40fdf2006-12-24 02:20:24 +00001069 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001070 switch (sz) {
1071 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1072 break;
1073 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1074 break;
1075 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1076 break;
1077 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1078 break;
1079 default: vpanic("putIRegRDX(amd64)");
1080 }
1081}
1082
1083
1084/* Simplistic functions to deal with the integer registers as a
1085 straightforward bank of 16 64-bit regs. */
sewardjb3a04292005-01-21 20:33:44 +00001086
1087static IRExpr* getIReg64 ( UInt regno )
1088{
1089 return IRExpr_Get( integerGuestReg64Offset(regno),
1090 Ity_I64 );
1091}
1092
sewardj2f959cc2005-01-26 01:19:35 +00001093static void putIReg64 ( UInt regno, IRExpr* e )
1094{
sewardjdd40fdf2006-12-24 02:20:24 +00001095 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00001096 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1097}
1098
sewardjb3a04292005-01-21 20:33:44 +00001099static HChar* nameIReg64 ( UInt regno )
1100{
sewardj5b470602005-02-27 13:10:48 +00001101 return nameIReg( 8, regno, False );
sewardjb3a04292005-01-21 20:33:44 +00001102}
sewardj5b470602005-02-27 13:10:48 +00001103
1104
1105/* Simplistic functions to deal with the lower halves of integer
1106 registers as a straightforward bank of 16 32-bit regs. */
1107
1108static IRExpr* getIReg32 ( UInt regno )
1109{
1110 vassert(!host_is_bigendian);
sewardjef425db2010-01-11 10:46:18 +00001111 return unop(Iop_64to32,
1112 IRExpr_Get( integerGuestReg64Offset(regno),
1113 Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001114}
1115
1116static void putIReg32 ( UInt regno, IRExpr* e )
1117{
sewardjdd40fdf2006-12-24 02:20:24 +00001118 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj5b470602005-02-27 13:10:48 +00001119 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1120 unop(Iop_32Uto64,e) ) );
1121}
1122
1123static HChar* nameIReg32 ( UInt regno )
1124{
1125 return nameIReg( 4, regno, False );
1126}
1127
1128
sewardja7ba8c42005-05-10 20:08:34 +00001129/* Simplistic functions to deal with the lower quarters of integer
1130 registers as a straightforward bank of 16 16-bit regs. */
1131
1132static IRExpr* getIReg16 ( UInt regno )
1133{
1134 vassert(!host_is_bigendian);
1135 return IRExpr_Get( integerGuestReg64Offset(regno),
1136 Ity_I16 );
1137}
1138
tom0fb4cbd2011-08-10 12:58:03 +00001139static void putIReg16 ( UInt regno, IRExpr* e )
1140{
1141 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1142 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1143 unop(Iop_16Uto64,e) ) );
1144}
1145
sewardja7ba8c42005-05-10 20:08:34 +00001146static HChar* nameIReg16 ( UInt regno )
1147{
1148 return nameIReg( 2, regno, False );
1149}
1150
1151
sewardj5b470602005-02-27 13:10:48 +00001152/* Sometimes what we know is a 3-bit register number, a REX byte, and
1153 which field of the REX byte is to be used to extend to a 4-bit
1154 number. These functions cater for that situation.
1155*/
1156static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1157{
1158 vassert(lo3bits < 8);
1159 vassert(IS_VALID_PFX(pfx));
1160 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1161}
1162
1163static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1164{
1165 vassert(lo3bits < 8);
1166 vassert(IS_VALID_PFX(pfx));
1167 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1168}
1169
1170static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1171{
1172 vassert(lo3bits < 8);
1173 vassert(IS_VALID_PFX(pfx));
1174 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1175 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001176 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001177}
1178
1179static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1180{
1181 vassert(lo3bits < 8);
1182 vassert(IS_VALID_PFX(pfx));
1183 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjef425db2010-01-11 10:46:18 +00001184 if (sz == 4) {
1185 sz = 8;
1186 return unop(Iop_64to32,
1187 IRExpr_Get(
1188 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1189 toBool(sz==1 && !haveREX(pfx)) ),
1190 szToITy(sz)
1191 )
1192 );
1193 } else {
1194 return IRExpr_Get(
1195 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1196 toBool(sz==1 && !haveREX(pfx)) ),
1197 szToITy(sz)
1198 );
1199 }
sewardj5b470602005-02-27 13:10:48 +00001200}
1201
1202static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1203{
1204 vassert(lo3bits < 8);
1205 vassert(IS_VALID_PFX(pfx));
sewardj98e9f342005-07-23 12:07:37 +00001206 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjdd40fdf2006-12-24 02:20:24 +00001207 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001208 stmt( IRStmt_Put(
1209 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001210 toBool(sz==1 && !haveREX(pfx)) ),
sewardj5b470602005-02-27 13:10:48 +00001211 sz==4 ? unop(Iop_32Uto64,e) : e
1212 ));
1213}
1214
1215
1216/* Functions for getting register numbers from modrm bytes and REX
1217 when we don't have to consider the complexities of integer subreg
1218 accesses.
1219*/
1220/* Extract the g reg field from a modRM byte, and augment it using the
1221 REX.R bit from the supplied REX byte. The R bit usually is
1222 associated with the g register field.
1223*/
1224static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1225{
1226 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1227 reg += (pfx & PFX_REXR) ? 8 : 0;
1228 return reg;
1229}
1230
1231/* Extract the e reg field from a modRM byte, and augment it using the
1232 REX.B bit from the supplied REX byte. The B bit usually is
1233 associated with the e register field (when modrm indicates e is a
1234 register, that is).
1235*/
1236static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1237{
1238 Int rm;
1239 vassert(epartIsReg(mod_reg_rm));
1240 rm = (Int)(mod_reg_rm & 0x7);
1241 rm += (pfx & PFX_REXB) ? 8 : 0;
1242 return rm;
1243}
1244
1245
1246/* General functions for dealing with integer register access. */
1247
1248/* Produce the guest state offset for a reference to the 'g' register
1249 field in a modrm byte, taking into account REX (or its absence),
1250 and the size of the access.
1251*/
1252static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1253{
1254 UInt reg;
1255 vassert(!host_is_bigendian);
1256 vassert(IS_VALID_PFX(pfx));
1257 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1258 reg = gregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001259 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001260}
1261
1262static
1263IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1264{
sewardjef425db2010-01-11 10:46:18 +00001265 if (sz == 4) {
1266 sz = 8;
1267 return unop(Iop_64to32,
1268 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1269 szToITy(sz) ));
1270 } else {
1271 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1272 szToITy(sz) );
1273 }
sewardj5b470602005-02-27 13:10:48 +00001274}
1275
1276static
1277void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1278{
sewardjdd40fdf2006-12-24 02:20:24 +00001279 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001280 if (sz == 4) {
1281 e = unop(Iop_32Uto64,e);
1282 }
1283 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1284}
1285
1286static
1287HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1288{
1289 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001290 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001291}
1292
1293
1294/* Produce the guest state offset for a reference to the 'e' register
1295 field in a modrm byte, taking into account REX (or its absence),
1296 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1297 denotes a memory access rather than a register access.
1298*/
1299static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1300{
1301 UInt reg;
1302 vassert(!host_is_bigendian);
1303 vassert(IS_VALID_PFX(pfx));
1304 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1305 reg = eregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001306 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001307}
1308
1309static
1310IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1311{
sewardjef425db2010-01-11 10:46:18 +00001312 if (sz == 4) {
1313 sz = 8;
1314 return unop(Iop_64to32,
1315 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1316 szToITy(sz) ));
1317 } else {
1318 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1319 szToITy(sz) );
1320 }
sewardj5b470602005-02-27 13:10:48 +00001321}
1322
1323static
1324void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1325{
sewardjdd40fdf2006-12-24 02:20:24 +00001326 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001327 if (sz == 4) {
1328 e = unop(Iop_32Uto64,e);
1329 }
1330 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1331}
1332
1333static
1334HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1335{
1336 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001337 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001338}
1339
1340
1341/*------------------------------------------------------------*/
1342/*--- For dealing with XMM registers ---*/
1343/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +00001344
sewardjc4530ae2012-05-21 10:18:49 +00001345static Int ymmGuestRegOffset ( UInt ymmreg )
1346{
1347 switch (ymmreg) {
1348 case 0: return OFFB_YMM0;
1349 case 1: return OFFB_YMM1;
1350 case 2: return OFFB_YMM2;
1351 case 3: return OFFB_YMM3;
1352 case 4: return OFFB_YMM4;
1353 case 5: return OFFB_YMM5;
1354 case 6: return OFFB_YMM6;
1355 case 7: return OFFB_YMM7;
1356 case 8: return OFFB_YMM8;
1357 case 9: return OFFB_YMM9;
1358 case 10: return OFFB_YMM10;
1359 case 11: return OFFB_YMM11;
1360 case 12: return OFFB_YMM12;
1361 case 13: return OFFB_YMM13;
1362 case 14: return OFFB_YMM14;
1363 case 15: return OFFB_YMM15;
1364 default: vpanic("ymmGuestRegOffset(amd64)");
1365 }
1366}
sewardj1001dc42005-02-21 08:25:55 +00001367
1368static Int xmmGuestRegOffset ( UInt xmmreg )
1369{
sewardjc4530ae2012-05-21 10:18:49 +00001370 /* Correct for little-endian host only. */
1371 vassert(!host_is_bigendian);
1372 return ymmGuestRegOffset( xmmreg );
sewardj1001dc42005-02-21 08:25:55 +00001373}
1374
sewardj97628592005-05-10 22:42:54 +00001375/* Lanes of vector registers are always numbered from zero being the
1376 least significant lane (rightmost in the register). */
1377
1378static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1379{
1380 /* Correct for little-endian host only. */
1381 vassert(!host_is_bigendian);
1382 vassert(laneno >= 0 && laneno < 8);
1383 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1384}
sewardj8d965312005-02-25 02:48:47 +00001385
1386static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1387{
1388 /* Correct for little-endian host only. */
1389 vassert(!host_is_bigendian);
1390 vassert(laneno >= 0 && laneno < 4);
1391 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1392}
sewardj1001dc42005-02-21 08:25:55 +00001393
1394static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1395{
1396 /* Correct for little-endian host only. */
1397 vassert(!host_is_bigendian);
1398 vassert(laneno >= 0 && laneno < 2);
1399 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1400}
1401
sewardjc4530ae2012-05-21 10:18:49 +00001402static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1403{
1404 /* Correct for little-endian host only. */
1405 vassert(!host_is_bigendian);
1406 vassert(laneno >= 0 && laneno < 2);
1407 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1408}
sewardj1001dc42005-02-21 08:25:55 +00001409
sewardj66becf32012-06-18 23:15:16 +00001410static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1411{
1412 /* Correct for little-endian host only. */
1413 vassert(!host_is_bigendian);
1414 vassert(laneno >= 0 && laneno < 4);
1415 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1416}
1417
1418static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1419{
1420 /* Correct for little-endian host only. */
1421 vassert(!host_is_bigendian);
1422 vassert(laneno >= 0 && laneno < 8);
1423 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1424}
1425
sewardj1001dc42005-02-21 08:25:55 +00001426static IRExpr* getXMMReg ( UInt xmmreg )
1427{
1428 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1429}
1430
1431static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1432{
1433 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1434}
1435
sewardj18303862005-02-21 12:36:54 +00001436static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1437{
1438 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1439}
1440
sewardj8d965312005-02-25 02:48:47 +00001441static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1442{
1443 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1444}
1445
sewardjc49ce232005-02-25 13:03:03 +00001446static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1447{
1448 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1449}
sewardj1001dc42005-02-21 08:25:55 +00001450
de5a70f5c2010-04-01 23:08:59 +00001451static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1452{
1453 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1454}
1455
sewardj1001dc42005-02-21 08:25:55 +00001456static void putXMMReg ( UInt xmmreg, IRExpr* e )
1457{
sewardjdd40fdf2006-12-24 02:20:24 +00001458 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
sewardj1001dc42005-02-21 08:25:55 +00001459 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1460}
1461
1462static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1463{
sewardjdd40fdf2006-12-24 02:20:24 +00001464 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj1001dc42005-02-21 08:25:55 +00001465 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1466}
1467
sewardj1a01e652005-02-23 11:39:21 +00001468static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1469{
sewardjdd40fdf2006-12-24 02:20:24 +00001470 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
sewardj1a01e652005-02-23 11:39:21 +00001471 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1472}
1473
sewardj8d965312005-02-25 02:48:47 +00001474static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1475{
sewardjdd40fdf2006-12-24 02:20:24 +00001476 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
sewardj8d965312005-02-25 02:48:47 +00001477 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1478}
1479
1480static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1481{
sewardjdd40fdf2006-12-24 02:20:24 +00001482 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00001483 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1484}
1485
sewardjc4530ae2012-05-21 10:18:49 +00001486static IRExpr* getYMMReg ( UInt xmmreg )
1487{
1488 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1489}
1490
1491static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1492{
1493 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1494}
1495
sewardj82096922012-06-24 14:57:59 +00001496static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1497{
1498 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1499}
1500
sewardj8eb7ae82012-06-24 14:00:27 +00001501static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1502{
1503 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1504}
1505
sewardjc4530ae2012-05-21 10:18:49 +00001506static void putYMMReg ( UInt ymmreg, IRExpr* e )
1507{
1508 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1509 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1510}
1511
1512static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1513{
1514 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1515 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1516}
1517
sewardj66becf32012-06-18 23:15:16 +00001518static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1519{
1520 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1521 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1522}
1523
sewardj82096922012-06-24 14:57:59 +00001524static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1525{
1526 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1527 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1528}
1529
sewardj66becf32012-06-18 23:15:16 +00001530static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1531{
1532 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1533 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1534}
1535
1536static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1537{
1538 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1539 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1540}
1541
sewardj1001dc42005-02-21 08:25:55 +00001542static IRExpr* mkV128 ( UShort mask )
1543{
1544 return IRExpr_Const(IRConst_V128(mask));
1545}
sewardjdf0e0022005-01-25 15:48:43 +00001546
sewardjc4530ae2012-05-21 10:18:49 +00001547/* Write the low half of a YMM reg and zero out the upper half. */
1548static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1549{
1550 putYMMRegLane128( ymmreg, 0, e );
1551 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1552}
1553
sewardje8f65252005-08-23 23:44:35 +00001554static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1555{
sewardjdd40fdf2006-12-24 02:20:24 +00001556 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1557 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
sewardje8f65252005-08-23 23:44:35 +00001558 return unop(Iop_64to1,
1559 binop(Iop_And64,
1560 unop(Iop_1Uto64,x),
1561 unop(Iop_1Uto64,y)));
1562}
1563
sewardje9d8a262009-07-01 08:06:34 +00001564/* Generate a compare-and-swap operation, operating on memory at
1565 'addr'. The expected value is 'expVal' and the new value is
1566 'newVal'. If the operation fails, then transfer control (with a
1567 no-redir jump (XXX no -- see comment at top of this file)) to
1568 'restart_point', which is presumably the address of the guest
1569 instruction again -- retrying, essentially. */
1570static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1571 Addr64 restart_point )
1572{
1573 IRCAS* cas;
1574 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1575 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1576 IRTemp oldTmp = newTemp(tyE);
1577 IRTemp expTmp = newTemp(tyE);
1578 vassert(tyE == tyN);
1579 vassert(tyE == Ity_I64 || tyE == Ity_I32
1580 || tyE == Ity_I16 || tyE == Ity_I8);
1581 assign(expTmp, expVal);
1582 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1583 NULL, mkexpr(expTmp), NULL, newVal );
1584 stmt( IRStmt_CAS(cas) );
1585 stmt( IRStmt_Exit(
sewardj1fb8c922009-07-12 12:56:53 +00001586 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1587 mkexpr(oldTmp), mkexpr(expTmp) ),
sewardje9d8a262009-07-01 08:06:34 +00001588 Ijk_Boring, /*Ijk_NoRedir*/
sewardjc6f970f2012-04-02 21:54:49 +00001589 IRConst_U64( restart_point ),
1590 OFFB_RIP
sewardje9d8a262009-07-01 08:06:34 +00001591 ));
1592}
1593
sewardj5b470602005-02-27 13:10:48 +00001594
sewardj118b23e2005-01-29 02:14:44 +00001595/*------------------------------------------------------------*/
sewardje8f65252005-08-23 23:44:35 +00001596/*--- Helpers for %rflags. ---*/
sewardj118b23e2005-01-29 02:14:44 +00001597/*------------------------------------------------------------*/
1598
1599/* -------------- Evaluating the flags-thunk. -------------- */
1600
1601/* Build IR to calculate all the eflags from stored
1602 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1603 Ity_I64. */
1604static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1605{
1606 IRExpr** args
1607 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1608 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1609 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1610 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1611 IRExpr* call
1612 = mkIRExprCCall(
1613 Ity_I64,
1614 0/*regparm*/,
1615 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1616 args
1617 );
1618 /* Exclude OP and NDEP from definedness checking. We're only
1619 interested in DEP1 and DEP2. */
1620 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1621 return call;
1622}
sewardj3ca55a12005-01-27 16:06:23 +00001623
1624/* Build IR to calculate some particular condition from stored
1625 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1626 Ity_Bit. */
1627static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1628{
1629 IRExpr** args
1630 = mkIRExprVec_5( mkU64(cond),
1631 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1632 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1633 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1634 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1635 IRExpr* call
1636 = mkIRExprCCall(
1637 Ity_I64,
1638 0/*regparm*/,
1639 "amd64g_calculate_condition", &amd64g_calculate_condition,
1640 args
1641 );
1642 /* Exclude the requested condition, OP and NDEP from definedness
1643 checking. We're only interested in DEP1 and DEP2. */
1644 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
sewardje58967e2005-04-27 11:50:56 +00001645 return unop(Iop_64to1, call);
sewardj3ca55a12005-01-27 16:06:23 +00001646}
sewardjdf0e0022005-01-25 15:48:43 +00001647
1648/* Build IR to calculate just the carry flag from stored
1649 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1650static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1651{
1652 IRExpr** args
1653 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1654 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1655 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1656 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1657 IRExpr* call
1658 = mkIRExprCCall(
1659 Ity_I64,
1660 0/*regparm*/,
1661 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1662 args
1663 );
1664 /* Exclude OP and NDEP from definedness checking. We're only
1665 interested in DEP1 and DEP2. */
1666 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1667 return call;
1668}
1669
1670
1671/* -------------- Building the flags-thunk. -------------- */
1672
1673/* The machinery in this section builds the flag-thunk following a
1674 flag-setting operation. Hence the various setFlags_* functions.
1675*/
1676
1677static Bool isAddSub ( IROp op8 )
1678{
sewardj7a240552005-01-28 21:37:12 +00001679 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
sewardjdf0e0022005-01-25 15:48:43 +00001680}
1681
sewardj3ca55a12005-01-27 16:06:23 +00001682static Bool isLogic ( IROp op8 )
1683{
sewardj7a240552005-01-28 21:37:12 +00001684 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
sewardj3ca55a12005-01-27 16:06:23 +00001685}
sewardjdf0e0022005-01-25 15:48:43 +00001686
1687/* U-widen 8/16/32/64 bit int expr to 64. */
1688static IRExpr* widenUto64 ( IRExpr* e )
1689{
sewardjdd40fdf2006-12-24 02:20:24 +00001690 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardjdf0e0022005-01-25 15:48:43 +00001691 case Ity_I64: return e;
1692 case Ity_I32: return unop(Iop_32Uto64, e);
sewardje58967e2005-04-27 11:50:56 +00001693 case Ity_I16: return unop(Iop_16Uto64, e);
1694 case Ity_I8: return unop(Iop_8Uto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001695 default: vpanic("widenUto64");
sewardjdf0e0022005-01-25 15:48:43 +00001696 }
1697}
1698
sewardj118b23e2005-01-29 02:14:44 +00001699/* S-widen 8/16/32/64 bit int expr to 32. */
1700static IRExpr* widenSto64 ( IRExpr* e )
1701{
sewardjdd40fdf2006-12-24 02:20:24 +00001702 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardj118b23e2005-01-29 02:14:44 +00001703 case Ity_I64: return e;
1704 case Ity_I32: return unop(Iop_32Sto64, e);
sewardje58967e2005-04-27 11:50:56 +00001705 case Ity_I16: return unop(Iop_16Sto64, e);
1706 case Ity_I8: return unop(Iop_8Sto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001707 default: vpanic("widenSto64");
1708 }
1709}
sewardjdf0e0022005-01-25 15:48:43 +00001710
1711/* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1712 of these combinations make sense. */
1713static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1714{
sewardjdd40fdf2006-12-24 02:20:24 +00001715 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
sewardjdf0e0022005-01-25 15:48:43 +00001716 if (src_ty == dst_ty)
1717 return e;
1718 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1719 return unop(Iop_32to16, e);
1720 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1721 return unop(Iop_32to8, e);
sewardj118b23e2005-01-29 02:14:44 +00001722 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1723 return unop(Iop_64to32, e);
1724 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
sewardje58967e2005-04-27 11:50:56 +00001725 return unop(Iop_64to16, e);
sewardj03b07cc2005-01-31 18:09:43 +00001726 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
sewardje58967e2005-04-27 11:50:56 +00001727 return unop(Iop_64to8, e);
sewardjdf0e0022005-01-25 15:48:43 +00001728
1729 vex_printf("\nsrc, dst tys are: ");
1730 ppIRType(src_ty);
1731 vex_printf(", ");
1732 ppIRType(dst_ty);
1733 vex_printf("\n");
1734 vpanic("narrowTo(amd64)");
1735}
1736
1737
1738/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1739 auto-sized up to the real op. */
1740
1741static
1742void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1743{
1744 Int ccOp = 0;
1745 switch (ty) {
1746 case Ity_I8: ccOp = 0; break;
1747 case Ity_I16: ccOp = 1; break;
1748 case Ity_I32: ccOp = 2; break;
1749 case Ity_I64: ccOp = 3; break;
1750 default: vassert(0);
1751 }
1752 switch (op8) {
1753 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1754 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1755 default: ppIROp(op8);
1756 vpanic("setFlags_DEP1_DEP2(amd64)");
1757 }
1758 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1759 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1760 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1761}
1762
1763
1764/* Set the OP and DEP1 fields only, and write zero to DEP2. */
1765
1766static
1767void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1768{
1769 Int ccOp = 0;
1770 switch (ty) {
1771 case Ity_I8: ccOp = 0; break;
1772 case Ity_I16: ccOp = 1; break;
1773 case Ity_I32: ccOp = 2; break;
1774 case Ity_I64: ccOp = 3; break;
1775 default: vassert(0);
1776 }
1777 switch (op8) {
1778 case Iop_Or8:
1779 case Iop_And8:
1780 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1781 default: ppIROp(op8);
1782 vpanic("setFlags_DEP1(amd64)");
1783 }
1784 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1785 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1786 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1787}
1788
1789
sewardj118b23e2005-01-29 02:14:44 +00001790/* For shift operations, we put in the result and the undershifted
1791 result. Except if the shift amount is zero, the thunk is left
1792 unchanged. */
1793
1794static void setFlags_DEP1_DEP2_shift ( IROp op64,
1795 IRTemp res,
1796 IRTemp resUS,
1797 IRType ty,
1798 IRTemp guard )
1799{
1800 Int ccOp = 0;
1801 switch (ty) {
1802 case Ity_I8: ccOp = 0; break;
1803 case Ity_I16: ccOp = 1; break;
1804 case Ity_I32: ccOp = 2; break;
1805 case Ity_I64: ccOp = 3; break;
1806 default: vassert(0);
1807 }
1808
1809 vassert(guard);
1810
1811 /* Both kinds of right shifts are handled by the same thunk
1812 operation. */
1813 switch (op64) {
1814 case Iop_Shr64:
1815 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1816 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1817 default: ppIROp(op64);
1818 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1819 }
1820
1821 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1822 stmt( IRStmt_Put( OFFB_CC_OP,
1823 IRExpr_Mux0X( mkexpr(guard),
1824 IRExpr_Get(OFFB_CC_OP,Ity_I64),
1825 mkU64(ccOp))) );
1826 stmt( IRStmt_Put( OFFB_CC_DEP1,
1827 IRExpr_Mux0X( mkexpr(guard),
1828 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
1829 widenUto64(mkexpr(res)))) );
1830 stmt( IRStmt_Put( OFFB_CC_DEP2,
1831 IRExpr_Mux0X( mkexpr(guard),
1832 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
1833 widenUto64(mkexpr(resUS)))) );
1834}
sewardj354e5c62005-01-27 20:12:52 +00001835
1836
1837/* For the inc/dec case, we store in DEP1 the result value and in NDEP
1838 the former value of the carry flag, which unfortunately we have to
1839 compute. */
1840
1841static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1842{
1843 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1844
1845 switch (ty) {
1846 case Ity_I8: ccOp += 0; break;
1847 case Ity_I16: ccOp += 1; break;
1848 case Ity_I32: ccOp += 2; break;
1849 case Ity_I64: ccOp += 3; break;
1850 default: vassert(0);
1851 }
1852
1853 /* This has to come first, because calculating the C flag
1854 may require reading all four thunk fields. */
1855 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1856 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
sewardj478646f2008-05-01 20:13:04 +00001857 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
sewardj354e5c62005-01-27 20:12:52 +00001858 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1859}
1860
1861
sewardj32b2bbe2005-01-28 00:50:10 +00001862/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1863 two arguments. */
1864
1865static
1866void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1867{
1868 switch (ty) {
1869 case Ity_I8:
1870 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1871 break;
1872 case Ity_I16:
1873 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1874 break;
1875 case Ity_I32:
1876 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1877 break;
1878 case Ity_I64:
1879 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1880 break;
1881 default:
1882 vpanic("setFlags_MUL(amd64)");
1883 }
1884 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1885 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1886}
sewardj3ca55a12005-01-27 16:06:23 +00001887
1888
1889/* -------------- Condition codes. -------------- */
1890
1891/* Condition codes, using the AMD encoding. */
1892
sewardj8c332e22005-01-28 01:36:56 +00001893static HChar* name_AMD64Condcode ( AMD64Condcode cond )
sewardj3ca55a12005-01-27 16:06:23 +00001894{
1895 switch (cond) {
1896 case AMD64CondO: return "o";
1897 case AMD64CondNO: return "no";
1898 case AMD64CondB: return "b";
sewardje941eea2005-01-30 19:52:28 +00001899 case AMD64CondNB: return "ae"; /*"nb";*/
1900 case AMD64CondZ: return "e"; /*"z";*/
1901 case AMD64CondNZ: return "ne"; /*"nz";*/
sewardj3ca55a12005-01-27 16:06:23 +00001902 case AMD64CondBE: return "be";
sewardje941eea2005-01-30 19:52:28 +00001903 case AMD64CondNBE: return "a"; /*"nbe";*/
sewardj3ca55a12005-01-27 16:06:23 +00001904 case AMD64CondS: return "s";
1905 case AMD64CondNS: return "ns";
1906 case AMD64CondP: return "p";
1907 case AMD64CondNP: return "np";
1908 case AMD64CondL: return "l";
sewardje941eea2005-01-30 19:52:28 +00001909 case AMD64CondNL: return "ge"; /*"nl";*/
sewardj3ca55a12005-01-27 16:06:23 +00001910 case AMD64CondLE: return "le";
sewardje941eea2005-01-30 19:52:28 +00001911 case AMD64CondNLE: return "g"; /*"nle";*/
sewardj3ca55a12005-01-27 16:06:23 +00001912 case AMD64CondAlways: return "ALWAYS";
1913 default: vpanic("name_AMD64Condcode");
1914 }
1915}
1916
sewardj1389d4d2005-01-28 13:46:29 +00001917static
1918AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1919 /*OUT*/Bool* needInvert )
1920{
1921 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1922 if (cond & 1) {
1923 *needInvert = True;
1924 return cond-1;
1925 } else {
1926 *needInvert = False;
1927 return cond;
1928 }
1929}
sewardjdf0e0022005-01-25 15:48:43 +00001930
1931
1932/* -------------- Helpers for ADD/SUB with carry. -------------- */
1933
1934/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1935 appropriately.
sewardje9d8a262009-07-01 08:06:34 +00001936
1937 Optionally, generate a store for the 'tres' value. This can either
1938 be a normal store, or it can be a cas-with-possible-failure style
1939 store:
1940
1941 if taddr is IRTemp_INVALID, then no store is generated.
1942
1943 if taddr is not IRTemp_INVALID, then a store (using taddr as
1944 the address) is generated:
1945
1946 if texpVal is IRTemp_INVALID then a normal store is
1947 generated, and restart_point must be zero (it is irrelevant).
1948
1949 if texpVal is not IRTemp_INVALID then a cas-style store is
1950 generated. texpVal is the expected value, restart_point
1951 is the restart point if the store fails, and texpVal must
1952 have the same type as tres.
1953
sewardjdf0e0022005-01-25 15:48:43 +00001954*/
1955static void helper_ADC ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00001956 IRTemp tres, IRTemp ta1, IRTemp ta2,
1957 /* info about optional store: */
1958 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00001959{
1960 UInt thunkOp;
1961 IRType ty = szToITy(sz);
1962 IRTemp oldc = newTemp(Ity_I64);
1963 IRTemp oldcn = newTemp(ty);
1964 IROp plus = mkSizedOp(ty, Iop_Add8);
1965 IROp xor = mkSizedOp(ty, Iop_Xor8);
1966
sewardje9d8a262009-07-01 08:06:34 +00001967 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
1968
sewardjdf0e0022005-01-25 15:48:43 +00001969 switch (sz) {
1970 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
1971 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
1972 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
1973 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
1974 default: vassert(0);
1975 }
1976
1977 /* oldc = old carry flag, 0 or 1 */
1978 assign( oldc, binop(Iop_And64,
1979 mk_amd64g_calculate_rflags_c(),
1980 mkU64(1)) );
1981
1982 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
1983
1984 assign( tres, binop(plus,
1985 binop(plus,mkexpr(ta1),mkexpr(ta2)),
1986 mkexpr(oldcn)) );
1987
sewardje9d8a262009-07-01 08:06:34 +00001988 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1989 start of this function. */
1990 if (taddr != IRTemp_INVALID) {
1991 if (texpVal == IRTemp_INVALID) {
1992 vassert(restart_point == 0);
1993 storeLE( mkexpr(taddr), mkexpr(tres) );
1994 } else {
1995 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
1996 /* .. and hence 'texpVal' has the same type as 'tres'. */
1997 casLE( mkexpr(taddr),
1998 mkexpr(texpVal), mkexpr(tres), restart_point );
1999 }
2000 }
2001
sewardjdf0e0022005-01-25 15:48:43 +00002002 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002003 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2004 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2005 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002006 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2007}
2008
2009
2010/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
sewardje9d8a262009-07-01 08:06:34 +00002011 appropriately. As with helper_ADC, possibly generate a store of
2012 the result -- see comments on helper_ADC for details.
sewardjdf0e0022005-01-25 15:48:43 +00002013*/
2014static void helper_SBB ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00002015 IRTemp tres, IRTemp ta1, IRTemp ta2,
2016 /* info about optional store: */
2017 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00002018{
2019 UInt thunkOp;
2020 IRType ty = szToITy(sz);
2021 IRTemp oldc = newTemp(Ity_I64);
2022 IRTemp oldcn = newTemp(ty);
2023 IROp minus = mkSizedOp(ty, Iop_Sub8);
2024 IROp xor = mkSizedOp(ty, Iop_Xor8);
2025
sewardje9d8a262009-07-01 08:06:34 +00002026 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2027
sewardjdf0e0022005-01-25 15:48:43 +00002028 switch (sz) {
2029 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2030 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2031 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2032 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2033 default: vassert(0);
2034 }
2035
2036 /* oldc = old carry flag, 0 or 1 */
2037 assign( oldc, binop(Iop_And64,
2038 mk_amd64g_calculate_rflags_c(),
2039 mkU64(1)) );
2040
2041 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2042
2043 assign( tres, binop(minus,
2044 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2045 mkexpr(oldcn)) );
2046
sewardje9d8a262009-07-01 08:06:34 +00002047 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2048 start of this function. */
2049 if (taddr != IRTemp_INVALID) {
2050 if (texpVal == IRTemp_INVALID) {
2051 vassert(restart_point == 0);
2052 storeLE( mkexpr(taddr), mkexpr(tres) );
2053 } else {
2054 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2055 /* .. and hence 'texpVal' has the same type as 'tres'. */
2056 casLE( mkexpr(taddr),
2057 mkexpr(texpVal), mkexpr(tres), restart_point );
2058 }
2059 }
2060
sewardjdf0e0022005-01-25 15:48:43 +00002061 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002062 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2063 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2064 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002065 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2066}
2067
2068
sewardj3ca55a12005-01-27 16:06:23 +00002069/* -------------- Helpers for disassembly printing. -------------- */
2070
2071static HChar* nameGrp1 ( Int opc_aux )
2072{
2073 static HChar* grp1_names[8]
2074 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2075 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2076 return grp1_names[opc_aux];
2077}
2078
sewardj118b23e2005-01-29 02:14:44 +00002079static HChar* nameGrp2 ( Int opc_aux )
2080{
2081 static HChar* grp2_names[8]
2082 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
sewardje941eea2005-01-30 19:52:28 +00002083 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
sewardj118b23e2005-01-29 02:14:44 +00002084 return grp2_names[opc_aux];
2085}
2086
sewardj03b07cc2005-01-31 18:09:43 +00002087static HChar* nameGrp4 ( Int opc_aux )
2088{
2089 static HChar* grp4_names[8]
2090 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2091 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2092 return grp4_names[opc_aux];
2093}
sewardj354e5c62005-01-27 20:12:52 +00002094
2095static HChar* nameGrp5 ( Int opc_aux )
2096{
2097 static HChar* grp5_names[8]
2098 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2099 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2100 return grp5_names[opc_aux];
2101}
2102
sewardj1d511802005-03-27 17:59:45 +00002103static HChar* nameGrp8 ( Int opc_aux )
2104{
2105 static HChar* grp8_names[8]
2106 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2107 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2108 return grp8_names[opc_aux];
2109}
2110
sewardjd20c8852005-01-20 20:04:07 +00002111//.. static HChar* nameSReg ( UInt sreg )
2112//.. {
2113//.. switch (sreg) {
2114//.. case R_ES: return "%es";
2115//.. case R_CS: return "%cs";
2116//.. case R_SS: return "%ss";
2117//.. case R_DS: return "%ds";
2118//.. case R_FS: return "%fs";
2119//.. case R_GS: return "%gs";
2120//.. default: vpanic("nameSReg(x86)");
2121//.. }
2122//.. }
sewardj8711f662005-05-09 17:52:56 +00002123
2124static HChar* nameMMXReg ( Int mmxreg )
2125{
2126 static HChar* mmx_names[8]
2127 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2128 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2129 return mmx_names[mmxreg];
2130}
sewardj1001dc42005-02-21 08:25:55 +00002131
2132static HChar* nameXMMReg ( Int xmmreg )
2133{
2134 static HChar* xmm_names[16]
2135 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2136 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2137 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2138 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2139 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2140 return xmm_names[xmmreg];
2141}
2142
sewardjca673ab2005-05-11 10:03:08 +00002143static HChar* nameMMXGran ( Int gran )
sewardj8711f662005-05-09 17:52:56 +00002144{
2145 switch (gran) {
2146 case 0: return "b";
2147 case 1: return "w";
2148 case 2: return "d";
2149 case 3: return "q";
2150 default: vpanic("nameMMXGran(amd64,guest)");
2151 }
2152}
sewardjdf0e0022005-01-25 15:48:43 +00002153
sewardj8c332e22005-01-28 01:36:56 +00002154static HChar nameISize ( Int size )
sewardjdf0e0022005-01-25 15:48:43 +00002155{
2156 switch (size) {
2157 case 8: return 'q';
2158 case 4: return 'l';
2159 case 2: return 'w';
2160 case 1: return 'b';
2161 default: vpanic("nameISize(amd64)");
2162 }
2163}
2164
sewardjc4530ae2012-05-21 10:18:49 +00002165static HChar* nameYMMReg ( Int ymmreg )
2166{
2167 static HChar* ymm_names[16]
2168 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2169 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2170 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2171 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2172 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2173 return ymm_names[ymmreg];
2174}
2175
sewardjdf0e0022005-01-25 15:48:43 +00002176
2177/*------------------------------------------------------------*/
2178/*--- JMP helpers ---*/
2179/*------------------------------------------------------------*/
2180
sewardjc6f970f2012-04-02 21:54:49 +00002181static void jmp_lit( /*MOD*/DisResult* dres,
2182 IRJumpKind kind, Addr64 d64 )
sewardjdf0e0022005-01-25 15:48:43 +00002183{
sewardjc6f970f2012-04-02 21:54:49 +00002184 vassert(dres->whatNext == Dis_Continue);
2185 vassert(dres->len == 0);
2186 vassert(dres->continueAt == 0);
2187 vassert(dres->jk_StopHere == Ijk_INVALID);
2188 dres->whatNext = Dis_StopHere;
2189 dres->jk_StopHere = kind;
2190 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
sewardjdf0e0022005-01-25 15:48:43 +00002191}
2192
sewardjc6f970f2012-04-02 21:54:49 +00002193static void jmp_treg( /*MOD*/DisResult* dres,
2194 IRJumpKind kind, IRTemp t )
sewardj2f959cc2005-01-26 01:19:35 +00002195{
sewardjc6f970f2012-04-02 21:54:49 +00002196 vassert(dres->whatNext == Dis_Continue);
2197 vassert(dres->len == 0);
2198 vassert(dres->continueAt == 0);
2199 vassert(dres->jk_StopHere == Ijk_INVALID);
2200 dres->whatNext = Dis_StopHere;
2201 dres->jk_StopHere = kind;
2202 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
sewardj2f959cc2005-01-26 01:19:35 +00002203}
2204
sewardj1389d4d2005-01-28 13:46:29 +00002205static
sewardjc6f970f2012-04-02 21:54:49 +00002206void jcc_01 ( /*MOD*/DisResult* dres,
2207 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
sewardj1389d4d2005-01-28 13:46:29 +00002208{
2209 Bool invert;
2210 AMD64Condcode condPos;
sewardjc6f970f2012-04-02 21:54:49 +00002211 vassert(dres->whatNext == Dis_Continue);
2212 vassert(dres->len == 0);
2213 vassert(dres->continueAt == 0);
2214 vassert(dres->jk_StopHere == Ijk_INVALID);
2215 dres->whatNext = Dis_StopHere;
2216 dres->jk_StopHere = Ijk_Boring;
sewardj1389d4d2005-01-28 13:46:29 +00002217 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2218 if (invert) {
2219 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2220 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002221 IRConst_U64(d64_false),
2222 OFFB_RIP ) );
2223 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002224 } else {
2225 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2226 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002227 IRConst_U64(d64_true),
2228 OFFB_RIP ) );
2229 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002230 }
2231}
sewardjb3a04292005-01-21 20:33:44 +00002232
sewardj478646f2008-05-01 20:13:04 +00002233/* Let new_rsp be the %rsp value after a call/return. Let nia be the
2234 guest address of the next instruction to be executed.
2235
2236 This function generates an AbiHint to say that -128(%rsp)
2237 .. -1(%rsp) should now be regarded as uninitialised.
sewardj5a9ffab2005-05-12 17:55:01 +00002238*/
sewardjaca070a2006-10-17 00:28:22 +00002239static
sewardj478646f2008-05-01 20:13:04 +00002240void make_redzone_AbiHint ( VexAbiInfo* vbi,
2241 IRTemp new_rsp, IRTemp nia, HChar* who )
sewardj5a9ffab2005-05-12 17:55:01 +00002242{
sewardjdd40fdf2006-12-24 02:20:24 +00002243 Int szB = vbi->guest_stack_redzone_size;
sewardjaca070a2006-10-17 00:28:22 +00002244 vassert(szB >= 0);
2245
2246 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2247 for is ELF. So just check it's the expected 128 value
2248 (paranoia). */
2249 vassert(szB == 128);
2250
sewardj5a9ffab2005-05-12 17:55:01 +00002251 if (0) vex_printf("AbiHint: %s\n", who);
sewardjdd40fdf2006-12-24 02:20:24 +00002252 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
sewardj478646f2008-05-01 20:13:04 +00002253 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
sewardjaca070a2006-10-17 00:28:22 +00002254 if (szB > 0)
2255 stmt( IRStmt_AbiHint(
2256 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
sewardj478646f2008-05-01 20:13:04 +00002257 szB,
2258 mkexpr(nia)
sewardjaca070a2006-10-17 00:28:22 +00002259 ));
sewardj5a9ffab2005-05-12 17:55:01 +00002260}
2261
sewardjb3a04292005-01-21 20:33:44 +00002262
2263/*------------------------------------------------------------*/
2264/*--- Disassembling addressing modes ---*/
2265/*------------------------------------------------------------*/
2266
2267static
sewardjc4356f02007-11-09 21:15:04 +00002268HChar* segRegTxt ( Prefix pfx )
sewardjb3a04292005-01-21 20:33:44 +00002269{
2270 if (pfx & PFX_CS) return "%cs:";
2271 if (pfx & PFX_DS) return "%ds:";
2272 if (pfx & PFX_ES) return "%es:";
2273 if (pfx & PFX_FS) return "%fs:";
2274 if (pfx & PFX_GS) return "%gs:";
2275 if (pfx & PFX_SS) return "%ss:";
2276 return ""; /* no override */
2277}
2278
2279
2280/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2281 linear address by adding any required segment override as indicated
sewardj42561ef2005-11-04 14:18:31 +00002282 by sorb, and also dealing with any address size override
2283 present. */
sewardjb3a04292005-01-21 20:33:44 +00002284static
sewardj2e28ac42008-12-04 00:05:12 +00002285IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
2286 Prefix pfx, IRExpr* virtual )
sewardjb3a04292005-01-21 20:33:44 +00002287{
sewardj42561ef2005-11-04 14:18:31 +00002288 /* --- segment overrides --- */
sewardja6b93d12005-02-17 09:28:28 +00002289 if (pfx & PFX_FS) {
sewardj2e28ac42008-12-04 00:05:12 +00002290 if (vbi->guest_amd64_assume_fs_is_zero) {
2291 /* Note that this is a linux-kernel specific hack that relies
2292 on the assumption that %fs is always zero. */
2293 /* return virtual + guest_FS_ZERO. */
2294 virtual = binop(Iop_Add64, virtual,
2295 IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
2296 } else {
2297 unimplemented("amd64 %fs segment override");
2298 }
sewardja6b93d12005-02-17 09:28:28 +00002299 }
sewardjb3a04292005-01-21 20:33:44 +00002300
sewardja6b93d12005-02-17 09:28:28 +00002301 if (pfx & PFX_GS) {
sewardj2e28ac42008-12-04 00:05:12 +00002302 if (vbi->guest_amd64_assume_gs_is_0x60) {
2303 /* Note that this is a darwin-kernel specific hack that relies
2304 on the assumption that %gs is always 0x60. */
2305 /* return virtual + guest_GS_0x60. */
2306 virtual = binop(Iop_Add64, virtual,
2307 IRExpr_Get(OFFB_GS_0x60, Ity_I64));
2308 } else {
2309 unimplemented("amd64 %gs segment override");
2310 }
sewardja6b93d12005-02-17 09:28:28 +00002311 }
2312
2313 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
sewardj42561ef2005-11-04 14:18:31 +00002314
2315 /* --- address size override --- */
2316 if (haveASO(pfx))
2317 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2318
sewardja6b93d12005-02-17 09:28:28 +00002319 return virtual;
sewardjb3a04292005-01-21 20:33:44 +00002320}
sewardja6b93d12005-02-17 09:28:28 +00002321
sewardjd20c8852005-01-20 20:04:07 +00002322//.. {
2323//.. Int sreg;
2324//.. IRType hWordTy;
2325//.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2326//..
2327//.. if (sorb == 0)
2328//.. /* the common case - no override */
2329//.. return virtual;
2330//..
2331//.. switch (sorb) {
2332//.. case 0x3E: sreg = R_DS; break;
2333//.. case 0x26: sreg = R_ES; break;
2334//.. case 0x64: sreg = R_FS; break;
2335//.. case 0x65: sreg = R_GS; break;
sewardj42561ef2005-11-04 14:18:31 +00002336//.. default: vpanic("handleAddrOverrides(x86,guest)");
sewardjd20c8852005-01-20 20:04:07 +00002337//.. }
2338//..
2339//.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2340//..
2341//.. seg_selector = newTemp(Ity_I32);
2342//.. ldt_ptr = newTemp(hWordTy);
2343//.. gdt_ptr = newTemp(hWordTy);
2344//.. r64 = newTemp(Ity_I64);
2345//..
2346//.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2347//.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2348//.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2349//..
2350//.. /*
2351//.. Call this to do the translation and limit checks:
2352//.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2353//.. UInt seg_selector, UInt virtual_addr )
2354//.. */
2355//.. assign(
2356//.. r64,
2357//.. mkIRExprCCall(
2358//.. Ity_I64,
2359//.. 0/*regparms*/,
2360//.. "x86g_use_seg_selector",
2361//.. &x86g_use_seg_selector,
2362//.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2363//.. mkexpr(seg_selector), virtual)
2364//.. )
2365//.. );
2366//..
2367//.. /* If the high 32 of the result are non-zero, there was a
2368//.. failure in address translation. In which case, make a
2369//.. quick exit.
2370//.. */
2371//.. stmt(
2372//.. IRStmt_Exit(
2373//.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2374//.. Ijk_MapFail,
2375//.. IRConst_U32( guest_eip_curr_instr )
2376//.. )
2377//.. );
2378//..
2379//.. /* otherwise, here's the translated result. */
2380//.. return unop(Iop_64to32, mkexpr(r64));
2381//.. }
sewardjb3a04292005-01-21 20:33:44 +00002382
2383
2384/* Generate IR to calculate an address indicated by a ModRM and
2385 following SIB bytes. The expression, and the number of bytes in
2386 the address mode, are returned (the latter in *len). Note that
2387 this fn should not be called if the R/M part of the address denotes
2388 a register instead of memory. If print_codegen is true, text of
2389 the addressing mode is placed in buf.
2390
2391 The computed address is stored in a new tempreg, and the
sewardje1698952005-02-08 15:02:39 +00002392 identity of the tempreg is returned.
2393
2394 extra_bytes holds the number of bytes after the amode, as supplied
2395 by the caller. This is needed to make sense of %rip-relative
2396 addresses. Note that the value that *len is set to is only the
2397 length of the amode itself and does not include the value supplied
sewardj09717342005-05-05 21:34:02 +00002398 in extra_bytes.
sewardje1698952005-02-08 15:02:39 +00002399 */
sewardjb3a04292005-01-21 20:33:44 +00002400
2401static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2402{
2403 IRTemp tmp = newTemp(Ity_I64);
2404 assign( tmp, addr64 );
2405 return tmp;
2406}
2407
2408static
sewardj2e28ac42008-12-04 00:05:12 +00002409IRTemp disAMode ( /*OUT*/Int* len,
2410 VexAbiInfo* vbi, Prefix pfx, Long delta,
2411 /*OUT*/HChar* buf, Int extra_bytes )
sewardjb3a04292005-01-21 20:33:44 +00002412{
sewardj8c332e22005-01-28 01:36:56 +00002413 UChar mod_reg_rm = getUChar(delta);
sewardjb3a04292005-01-21 20:33:44 +00002414 delta++;
2415
2416 buf[0] = (UChar)0;
sewardje1698952005-02-08 15:02:39 +00002417 vassert(extra_bytes >= 0 && extra_bytes < 10);
sewardjb3a04292005-01-21 20:33:44 +00002418
2419 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2420 jump table seems a bit excessive.
2421 */
sewardj7a240552005-01-28 21:37:12 +00002422 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002423 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2424 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002425 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardjb3a04292005-01-21 20:33:44 +00002426 switch (mod_reg_rm) {
2427
2428 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2429 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2430 */
2431 case 0x00: case 0x01: case 0x02: case 0x03:
2432 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj1027dc22005-02-26 01:55:02 +00002433 { UChar rm = toUChar(mod_reg_rm & 7);
sewardjc4356f02007-11-09 21:15:04 +00002434 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002435 *len = 1;
2436 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002437 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
sewardjb3a04292005-01-21 20:33:44 +00002438 }
2439
2440 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2441 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2442 */
2443 case 0x08: case 0x09: case 0x0A: case 0x0B:
2444 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj1027dc22005-02-26 01:55:02 +00002445 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj8c332e22005-01-28 01:36:56 +00002446 Long d = getSDisp8(delta);
sewardj7eaa7cf2005-01-31 18:55:22 +00002447 if (d == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002448 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002449 } else {
sewardjc4356f02007-11-09 21:15:04 +00002450 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002451 }
sewardjb3a04292005-01-21 20:33:44 +00002452 *len = 2;
2453 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002454 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002455 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002456 }
2457
2458 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2459 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2460 */
2461 case 0x10: case 0x11: case 0x12: case 0x13:
2462 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj1027dc22005-02-26 01:55:02 +00002463 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj227458e2005-01-31 19:04:50 +00002464 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002465 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002466 *len = 5;
2467 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002468 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002469 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002470 }
2471
2472 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2473 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2474 case 0x18: case 0x19: case 0x1A: case 0x1B:
2475 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
sewardj354e5c62005-01-27 20:12:52 +00002476 vpanic("disAMode(amd64): not an addr!");
sewardjb3a04292005-01-21 20:33:44 +00002477
sewardj9e6491a2005-07-02 19:24:10 +00002478 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
sewardjb3a04292005-01-21 20:33:44 +00002479 correctly at the start of handling each instruction. */
2480 case 0x05:
sewardj227458e2005-01-31 19:04:50 +00002481 { Long d = getSDisp32(delta);
sewardjb3a04292005-01-21 20:33:44 +00002482 *len = 5;
sewardjc4356f02007-11-09 21:15:04 +00002483 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
sewardj4b744762005-02-07 15:02:25 +00002484 /* We need to know the next instruction's start address.
2485 Try and figure out what it is, record the guess, and ask
2486 the top-level driver logic (bbToIR_AMD64) to check we
2487 guessed right, after the instruction is completely
2488 decoded. */
sewardj9e6491a2005-07-02 19:24:10 +00002489 guest_RIP_next_mustcheck = True;
2490 guest_RIP_next_assumed = guest_RIP_bbstart
sewardje1698952005-02-08 15:02:39 +00002491 + delta+4 + extra_bytes;
sewardjb3a04292005-01-21 20:33:44 +00002492 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002493 handleAddrOverrides(vbi, pfx,
sewardj9e6491a2005-07-02 19:24:10 +00002494 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
sewardjb3a04292005-01-21 20:33:44 +00002495 mkU64(d))));
2496 }
sewardj3ca55a12005-01-27 16:06:23 +00002497
sewardj2f959cc2005-01-26 01:19:35 +00002498 case 0x04: {
2499 /* SIB, with no displacement. Special cases:
sewardj3ca55a12005-01-27 16:06:23 +00002500 -- %rsp cannot act as an index value.
2501 If index_r indicates %rsp, zero is used for the index.
2502 -- when mod is zero and base indicates RBP or R13, base is
2503 instead a 32-bit sign-extended literal.
sewardj2f959cc2005-01-26 01:19:35 +00002504 It's all madness, I tell you. Extract %index, %base and
2505 scale from the SIB byte. The value denoted is then:
sewardj3ca55a12005-01-27 16:06:23 +00002506 | %index == %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002507 = d32 following SIB byte
sewardj3ca55a12005-01-27 16:06:23 +00002508 | %index == %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002509 = %base
sewardj3ca55a12005-01-27 16:06:23 +00002510 | %index != %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002511 = d32 following SIB byte + (%index << scale)
sewardj3ca55a12005-01-27 16:06:23 +00002512 | %index != %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002513 = %base + (%index << scale)
2514 */
sewardj8c332e22005-01-28 01:36:56 +00002515 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002516 UChar scale = toUChar((sib >> 6) & 3);
2517 UChar index_r = toUChar((sib >> 3) & 7);
2518 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002519 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002520 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2521 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
sewardj2f959cc2005-01-26 01:19:35 +00002522 delta++;
sewardjb3a04292005-01-21 20:33:44 +00002523
sewardj3ca55a12005-01-27 16:06:23 +00002524 if ((!index_is_SP) && (!base_is_BPor13)) {
sewardje941eea2005-01-30 19:52:28 +00002525 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002526 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002527 nameIRegRexB(8,pfx,base_r),
2528 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002529 } else {
sewardjc4356f02007-11-09 21:15:04 +00002530 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002531 nameIRegRexB(8,pfx,base_r),
2532 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002533 }
sewardj2f959cc2005-01-26 01:19:35 +00002534 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002535 return
2536 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002537 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002538 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002539 getIRegRexB(8,pfx,base_r),
2540 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj2f959cc2005-01-26 01:19:35 +00002541 mkU8(scale)))));
2542 }
2543
sewardj3ca55a12005-01-27 16:06:23 +00002544 if ((!index_is_SP) && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002545 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002546 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002547 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardj2f959cc2005-01-26 01:19:35 +00002548 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002549 return
2550 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002551 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002552 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002553 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj3ca55a12005-01-27 16:06:23 +00002554 mkU8(scale)),
2555 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002556 }
2557
sewardj3ca55a12005-01-27 16:06:23 +00002558 if (index_is_SP && (!base_is_BPor13)) {
sewardjc4356f02007-11-09 21:15:04 +00002559 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002560 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002561 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002562 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
sewardj2f959cc2005-01-26 01:19:35 +00002563 }
2564
sewardj3ca55a12005-01-27 16:06:23 +00002565 if (index_is_SP && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002566 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002567 DIS(buf, "%s%lld", segRegTxt(pfx), d);
sewardj2f959cc2005-01-26 01:19:35 +00002568 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002569 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002570 handleAddrOverrides(vbi, pfx, mkU64(d)));
sewardj2f959cc2005-01-26 01:19:35 +00002571 }
2572
2573 vassert(0);
2574 }
sewardj3ca55a12005-01-27 16:06:23 +00002575
sewardj2f959cc2005-01-26 01:19:35 +00002576 /* SIB, with 8-bit displacement. Special cases:
2577 -- %esp cannot act as an index value.
2578 If index_r indicates %esp, zero is used for the index.
2579 Denoted value is:
2580 | %index == %ESP
2581 = d8 + %base
2582 | %index != %ESP
2583 = d8 + %base + (%index << scale)
2584 */
2585 case 0x0C: {
sewardj8c332e22005-01-28 01:36:56 +00002586 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002587 UChar scale = toUChar((sib >> 6) & 3);
2588 UChar index_r = toUChar((sib >> 3) & 7);
2589 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002590 Long d = getSDisp8(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002591
sewardj3ca55a12005-01-27 16:06:23 +00002592 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002593 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002594 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002595 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002596 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002597 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002598 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002599 } else {
sewardje941eea2005-01-30 19:52:28 +00002600 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002601 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002602 nameIRegRexB(8,pfx,base_r),
2603 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002604 } else {
sewardjc4356f02007-11-09 21:15:04 +00002605 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002606 nameIRegRexB(8,pfx,base_r),
2607 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002608 }
sewardj2f959cc2005-01-26 01:19:35 +00002609 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002610 return
2611 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002612 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002613 binop(Iop_Add64,
2614 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002615 getIRegRexB(8,pfx,base_r),
sewardj3ca55a12005-01-27 16:06:23 +00002616 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002617 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj3ca55a12005-01-27 16:06:23 +00002618 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002619 }
sewardj3ca55a12005-01-27 16:06:23 +00002620 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002621 }
sewardj3ca55a12005-01-27 16:06:23 +00002622
sewardj2f959cc2005-01-26 01:19:35 +00002623 /* SIB, with 32-bit displacement. Special cases:
2624 -- %rsp cannot act as an index value.
2625 If index_r indicates %rsp, zero is used for the index.
2626 Denoted value is:
2627 | %index == %RSP
2628 = d32 + %base
2629 | %index != %RSP
2630 = d32 + %base + (%index << scale)
2631 */
2632 case 0x14: {
sewardj8c332e22005-01-28 01:36:56 +00002633 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002634 UChar scale = toUChar((sib >> 6) & 3);
2635 UChar index_r = toUChar((sib >> 3) & 7);
2636 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002637 Long d = getSDisp32(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002638
2639 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002640 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002641 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002642 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002643 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002644 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002645 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002646 } else {
sewardje941eea2005-01-30 19:52:28 +00002647 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002648 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002649 nameIRegRexB(8,pfx,base_r),
2650 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002651 } else {
sewardjc4356f02007-11-09 21:15:04 +00002652 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002653 nameIRegRexB(8,pfx,base_r),
2654 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002655 }
sewardj2f959cc2005-01-26 01:19:35 +00002656 *len = 6;
2657 return
2658 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002659 handleAddrOverrides(vbi, pfx,
sewardj2f959cc2005-01-26 01:19:35 +00002660 binop(Iop_Add64,
2661 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002662 getIRegRexB(8,pfx,base_r),
sewardj2f959cc2005-01-26 01:19:35 +00002663 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002664 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj2f959cc2005-01-26 01:19:35 +00002665 mkU64(d))));
2666 }
sewardj3ca55a12005-01-27 16:06:23 +00002667 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002668 }
2669
sewardjb3a04292005-01-21 20:33:44 +00002670 default:
2671 vpanic("disAMode(amd64)");
2672 return 0; /*notreached*/
2673 }
2674}
2675
2676
sewardj3ca55a12005-01-27 16:06:23 +00002677/* Figure out the number of (insn-stream) bytes constituting the amode
2678 beginning at delta. Is useful for getting hold of literals beyond
2679 the end of the amode before it has been disassembled. */
2680
sewardj270def42005-07-03 01:03:01 +00002681static UInt lengthAMode ( Prefix pfx, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +00002682{
sewardj8c332e22005-01-28 01:36:56 +00002683 UChar mod_reg_rm = getUChar(delta);
sewardj3ca55a12005-01-27 16:06:23 +00002684 delta++;
2685
2686 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2687 jump table seems a bit excessive.
2688 */
sewardj7a240552005-01-28 21:37:12 +00002689 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002690 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2691 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002692 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardj3ca55a12005-01-27 16:06:23 +00002693 switch (mod_reg_rm) {
2694
2695 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2696 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2697 */
2698 case 0x00: case 0x01: case 0x02: case 0x03:
2699 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj3ca55a12005-01-27 16:06:23 +00002700 return 1;
2701
2702 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2703 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2704 */
2705 case 0x08: case 0x09: case 0x0A: case 0x0B:
2706 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj3ca55a12005-01-27 16:06:23 +00002707 return 2;
2708
2709 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2710 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2711 */
2712 case 0x10: case 0x11: case 0x12: case 0x13:
2713 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj3ca55a12005-01-27 16:06:23 +00002714 return 5;
2715
2716 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2717 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2718 /* Not an address, but still handled. */
2719 case 0x18: case 0x19: case 0x1A: case 0x1B:
2720 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2721 return 1;
2722
2723 /* RIP + disp32. */
2724 case 0x05:
sewardj3ca55a12005-01-27 16:06:23 +00002725 return 5;
2726
2727 case 0x04: {
2728 /* SIB, with no displacement. */
sewardj8c332e22005-01-28 01:36:56 +00002729 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002730 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002731 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002732 Bool base_is_BPor13 = toBool(base_r == R_RBP);
sewardj3ca55a12005-01-27 16:06:23 +00002733
2734 if (base_is_BPor13) {
sewardj3ca55a12005-01-27 16:06:23 +00002735 return 6;
2736 } else {
sewardj3ca55a12005-01-27 16:06:23 +00002737 return 2;
2738 }
2739 }
2740
2741 /* SIB, with 8-bit displacement. */
2742 case 0x0C:
2743 return 3;
2744
2745 /* SIB, with 32-bit displacement. */
2746 case 0x14:
2747 return 6;
2748
2749 default:
2750 vpanic("lengthAMode(amd64)");
2751 return 0; /*notreached*/
2752 }
2753}
2754
2755
sewardjdf0e0022005-01-25 15:48:43 +00002756/*------------------------------------------------------------*/
2757/*--- Disassembling common idioms ---*/
2758/*------------------------------------------------------------*/
2759
sewardjdf0e0022005-01-25 15:48:43 +00002760/* Handle binary integer instructions of the form
2761 op E, G meaning
2762 op reg-or-mem, reg
2763 Is passed the a ptr to the modRM byte, the actual operation, and the
2764 data size. Returns the address advanced completely over this
2765 instruction.
2766
2767 E(src) is reg-or-mem
2768 G(dst) is reg.
2769
2770 If E is reg, --> GET %G, tmp
2771 OP %E, tmp
2772 PUT tmp, %G
2773
2774 If E is mem and OP is not reversible,
2775 --> (getAddr E) -> tmpa
2776 LD (tmpa), tmpa
2777 GET %G, tmp2
2778 OP tmpa, tmp2
2779 PUT tmp2, %G
2780
2781 If E is mem and OP is reversible
2782 --> (getAddr E) -> tmpa
2783 LD (tmpa), tmpa
2784 OP %G, tmpa
2785 PUT tmpa, %G
2786*/
2787static
sewardj2e28ac42008-12-04 00:05:12 +00002788ULong dis_op2_E_G ( VexAbiInfo* vbi,
2789 Prefix pfx,
sewardjdf0e0022005-01-25 15:48:43 +00002790 Bool addSubCarry,
2791 IROp op8,
2792 Bool keep,
2793 Int size,
sewardj270def42005-07-03 01:03:01 +00002794 Long delta0,
sewardj8c332e22005-01-28 01:36:56 +00002795 HChar* t_amd64opc )
sewardjdf0e0022005-01-25 15:48:43 +00002796{
2797 HChar dis_buf[50];
2798 Int len;
2799 IRType ty = szToITy(size);
2800 IRTemp dst1 = newTemp(ty);
2801 IRTemp src = newTemp(ty);
2802 IRTemp dst0 = newTemp(ty);
2803 UChar rm = getUChar(delta0);
2804 IRTemp addr = IRTemp_INVALID;
2805
2806 /* addSubCarry == True indicates the intended operation is
2807 add-with-carry or subtract-with-borrow. */
2808 if (addSubCarry) {
2809 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2810 vassert(keep);
2811 }
2812
2813 if (epartIsReg(rm)) {
sewardjdf0e0022005-01-25 15:48:43 +00002814 /* Specially handle XOR reg,reg, because that doesn't really
2815 depend on reg, and doing the obvious thing potentially
2816 generates a spurious value check failure due to the bogus
2817 dependency. */
sewardj5b470602005-02-27 13:10:48 +00002818 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2819 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
sewardj4f9847d2005-07-25 11:58:34 +00002820 if (False && op8 == Iop_Sub8)
sewardj5b470602005-02-27 13:10:48 +00002821 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
2822 putIRegG(size,pfx,rm, mkU(ty,0));
sewardjdf0e0022005-01-25 15:48:43 +00002823 }
sewardj5b470602005-02-27 13:10:48 +00002824
2825 assign( dst0, getIRegG(size,pfx,rm) );
2826 assign( src, getIRegE(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002827
2828 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002829 helper_ADC( size, dst1, dst0, src,
2830 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002831 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002832 } else
2833 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002834 helper_SBB( size, dst1, dst0, src,
2835 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002836 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002837 } else {
2838 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2839 if (isAddSub(op8))
2840 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2841 else
2842 setFlags_DEP1(op8, dst1, ty);
2843 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002844 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002845 }
2846
2847 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002848 nameIRegE(size,pfx,rm),
2849 nameIRegG(size,pfx,rm));
sewardjdf0e0022005-01-25 15:48:43 +00002850 return 1+delta0;
2851 } else {
2852 /* E refers to memory */
sewardj2e28ac42008-12-04 00:05:12 +00002853 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00002854 assign( dst0, getIRegG(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002855 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2856
2857 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002858 helper_ADC( size, dst1, dst0, src,
2859 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002860 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002861 } else
2862 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002863 helper_SBB( size, dst1, dst0, src,
2864 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002865 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002866 } else {
2867 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2868 if (isAddSub(op8))
2869 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2870 else
2871 setFlags_DEP1(op8, dst1, ty);
2872 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002873 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002874 }
2875
2876 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002877 dis_buf, nameIRegG(size, pfx, rm));
sewardjdf0e0022005-01-25 15:48:43 +00002878 return len+delta0;
2879 }
2880}
2881
2882
2883
sewardj3ca55a12005-01-27 16:06:23 +00002884/* Handle binary integer instructions of the form
2885 op G, E meaning
2886 op reg, reg-or-mem
2887 Is passed the a ptr to the modRM byte, the actual operation, and the
2888 data size. Returns the address advanced completely over this
2889 instruction.
2890
2891 G(src) is reg.
2892 E(dst) is reg-or-mem
2893
2894 If E is reg, --> GET %E, tmp
2895 OP %G, tmp
2896 PUT tmp, %E
2897
2898 If E is mem, --> (getAddr E) -> tmpa
2899 LD (tmpa), tmpv
2900 OP %G, tmpv
2901 ST tmpv, (tmpa)
2902*/
2903static
sewardj2e28ac42008-12-04 00:05:12 +00002904ULong dis_op2_G_E ( VexAbiInfo* vbi,
2905 Prefix pfx,
sewardj8c332e22005-01-28 01:36:56 +00002906 Bool addSubCarry,
2907 IROp op8,
2908 Bool keep,
2909 Int size,
sewardj270def42005-07-03 01:03:01 +00002910 Long delta0,
sewardj8c332e22005-01-28 01:36:56 +00002911 HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00002912{
2913 HChar dis_buf[50];
2914 Int len;
2915 IRType ty = szToITy(size);
2916 IRTemp dst1 = newTemp(ty);
2917 IRTemp src = newTemp(ty);
2918 IRTemp dst0 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00002919 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00002920 IRTemp addr = IRTemp_INVALID;
2921
2922 /* addSubCarry == True indicates the intended operation is
2923 add-with-carry or subtract-with-borrow. */
2924 if (addSubCarry) {
2925 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2926 vassert(keep);
2927 }
2928
2929 if (epartIsReg(rm)) {
2930 /* Specially handle XOR reg,reg, because that doesn't really
2931 depend on reg, and doing the obvious thing potentially
2932 generates a spurious value check failure due to the bogus
sewardj5b470602005-02-27 13:10:48 +00002933 dependency. Ditto SBB reg,reg. */
2934 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2935 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
2936 putIRegE(size,pfx,rm, mkU(ty,0));
sewardj3ca55a12005-01-27 16:06:23 +00002937 }
sewardj5b470602005-02-27 13:10:48 +00002938
2939 assign(dst0, getIRegE(size,pfx,rm));
2940 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00002941
2942 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002943 helper_ADC( size, dst1, dst0, src,
2944 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002945 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00002946 } else
2947 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002948 helper_SBB( size, dst1, dst0, src,
2949 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002950 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00002951 } else {
2952 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
2953 if (isAddSub(op8))
2954 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2955 else
2956 setFlags_DEP1(op8, dst1, ty);
2957 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002958 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00002959 }
2960
2961 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002962 nameIRegG(size,pfx,rm),
2963 nameIRegE(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00002964 return 1+delta0;
2965 }
2966
2967 /* E refers to memory */
2968 {
sewardj2e28ac42008-12-04 00:05:12 +00002969 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00002970 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj5b470602005-02-27 13:10:48 +00002971 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00002972
2973 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002974 if (pfx & PFX_LOCK) {
2975 /* cas-style store */
2976 helper_ADC( size, dst1, dst0, src,
2977 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2978 } else {
2979 /* normal store */
2980 helper_ADC( size, dst1, dst0, src,
2981 /*store*/addr, IRTemp_INVALID, 0 );
2982 }
sewardj3ca55a12005-01-27 16:06:23 +00002983 } else
2984 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002985 if (pfx & PFX_LOCK) {
2986 /* cas-style store */
2987 helper_SBB( size, dst1, dst0, src,
2988 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
2989 } else {
2990 /* normal store */
2991 helper_SBB( size, dst1, dst0, src,
2992 /*store*/addr, IRTemp_INVALID, 0 );
2993 }
sewardj3ca55a12005-01-27 16:06:23 +00002994 } else {
2995 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00002996 if (keep) {
2997 if (pfx & PFX_LOCK) {
2998 if (0) vex_printf("locked case\n" );
2999 casLE( mkexpr(addr),
3000 mkexpr(dst0)/*expval*/,
3001 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3002 } else {
3003 if (0) vex_printf("nonlocked case\n");
3004 storeLE(mkexpr(addr), mkexpr(dst1));
3005 }
3006 }
sewardj3ca55a12005-01-27 16:06:23 +00003007 if (isAddSub(op8))
3008 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3009 else
3010 setFlags_DEP1(op8, dst1, ty);
sewardj3ca55a12005-01-27 16:06:23 +00003011 }
3012
3013 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003014 nameIRegG(size,pfx,rm), dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003015 return len+delta0;
3016 }
3017}
3018
3019
sewardj1389d4d2005-01-28 13:46:29 +00003020/* Handle move instructions of the form
3021 mov E, G meaning
3022 mov reg-or-mem, reg
3023 Is passed the a ptr to the modRM byte, and the data size. Returns
3024 the address advanced completely over this instruction.
3025
3026 E(src) is reg-or-mem
3027 G(dst) is reg.
3028
3029 If E is reg, --> GET %E, tmpv
3030 PUT tmpv, %G
3031
3032 If E is mem --> (getAddr E) -> tmpa
3033 LD (tmpa), tmpb
3034 PUT tmpb, %G
3035*/
3036static
sewardj2e28ac42008-12-04 00:05:12 +00003037ULong dis_mov_E_G ( VexAbiInfo* vbi,
3038 Prefix pfx,
sewardj1389d4d2005-01-28 13:46:29 +00003039 Int size,
sewardj270def42005-07-03 01:03:01 +00003040 Long delta0 )
sewardj1389d4d2005-01-28 13:46:29 +00003041{
3042 Int len;
3043 UChar rm = getUChar(delta0);
3044 HChar dis_buf[50];
3045
3046 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003047 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003048 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003049 nameIRegE(size,pfx,rm),
3050 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003051 return 1+delta0;
3052 }
3053
3054 /* E refers to memory */
3055 {
sewardj2e28ac42008-12-04 00:05:12 +00003056 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003057 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
sewardj1389d4d2005-01-28 13:46:29 +00003058 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003059 dis_buf,
3060 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003061 return delta0+len;
3062 }
3063}
3064
3065
3066/* Handle move instructions of the form
3067 mov G, E meaning
3068 mov reg, reg-or-mem
3069 Is passed the a ptr to the modRM byte, and the data size. Returns
3070 the address advanced completely over this instruction.
3071
3072 G(src) is reg.
3073 E(dst) is reg-or-mem
3074
3075 If E is reg, --> GET %G, tmp
3076 PUT tmp, %E
3077
3078 If E is mem, --> (getAddr E) -> tmpa
3079 GET %G, tmpv
3080 ST tmpv, (tmpa)
3081*/
3082static
sewardj2e28ac42008-12-04 00:05:12 +00003083ULong dis_mov_G_E ( VexAbiInfo* vbi,
3084 Prefix pfx,
sewardj1389d4d2005-01-28 13:46:29 +00003085 Int size,
sewardj270def42005-07-03 01:03:01 +00003086 Long delta0 )
sewardj1389d4d2005-01-28 13:46:29 +00003087{
3088 Int len;
3089 UChar rm = getUChar(delta0);
3090 HChar dis_buf[50];
3091
3092 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003093 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003094 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003095 nameIRegG(size,pfx,rm),
3096 nameIRegE(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003097 return 1+delta0;
3098 }
3099
3100 /* E refers to memory */
3101 {
sewardj2e28ac42008-12-04 00:05:12 +00003102 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003103 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
sewardj1389d4d2005-01-28 13:46:29 +00003104 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003105 nameIRegG(size,pfx,rm),
3106 dis_buf);
sewardj1389d4d2005-01-28 13:46:29 +00003107 return len+delta0;
3108 }
3109}
sewardj3ca55a12005-01-27 16:06:23 +00003110
3111
3112/* op $immediate, AL/AX/EAX/RAX. */
3113static
sewardj8c332e22005-01-28 01:36:56 +00003114ULong dis_op_imm_A ( Int size,
sewardj41c01092005-07-23 13:50:32 +00003115 Bool carrying,
sewardj8c332e22005-01-28 01:36:56 +00003116 IROp op8,
3117 Bool keep,
sewardj270def42005-07-03 01:03:01 +00003118 Long delta,
sewardj8c332e22005-01-28 01:36:56 +00003119 HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00003120{
3121 Int size4 = imin(size,4);
3122 IRType ty = szToITy(size);
3123 IRTemp dst0 = newTemp(ty);
3124 IRTemp src = newTemp(ty);
3125 IRTemp dst1 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00003126 Long lit = getSDisp(size4,delta);
sewardj5b470602005-02-27 13:10:48 +00003127 assign(dst0, getIRegRAX(size));
sewardj1389d4d2005-01-28 13:46:29 +00003128 assign(src, mkU(ty,lit & mkSizeMask(size)));
sewardj41c01092005-07-23 13:50:32 +00003129
3130 if (isAddSub(op8) && !carrying) {
3131 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003132 setFlags_DEP1_DEP2(op8, dst0, src, ty);
sewardj41c01092005-07-23 13:50:32 +00003133 }
sewardj3ca55a12005-01-27 16:06:23 +00003134 else
sewardj41c01092005-07-23 13:50:32 +00003135 if (isLogic(op8)) {
3136 vassert(!carrying);
3137 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003138 setFlags_DEP1(op8, dst1, ty);
sewardj41c01092005-07-23 13:50:32 +00003139 }
sewardj3ca55a12005-01-27 16:06:23 +00003140 else
sewardj41c01092005-07-23 13:50:32 +00003141 if (op8 == Iop_Add8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003142 helper_ADC( size, dst1, dst0, src,
3143 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj41c01092005-07-23 13:50:32 +00003144 }
3145 else
sewardj5fadaf92006-05-12 20:45:59 +00003146 if (op8 == Iop_Sub8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003147 helper_SBB( size, dst1, dst0, src,
3148 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5fadaf92006-05-12 20:45:59 +00003149 }
3150 else
sewardj41c01092005-07-23 13:50:32 +00003151 vpanic("dis_op_imm_A(amd64,guest)");
sewardj3ca55a12005-01-27 16:06:23 +00003152
3153 if (keep)
sewardj5b470602005-02-27 13:10:48 +00003154 putIRegRAX(size, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003155
3156 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003157 lit, nameIRegRAX(size));
sewardj3ca55a12005-01-27 16:06:23 +00003158 return delta+size4;
3159}
3160
3161
sewardj5e525292005-01-28 15:13:10 +00003162/* Sign- and Zero-extending moves. */
3163static
sewardj2e28ac42008-12-04 00:05:12 +00003164ULong dis_movx_E_G ( VexAbiInfo* vbi,
3165 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003166 Long delta, Int szs, Int szd, Bool sign_extend )
sewardj5e525292005-01-28 15:13:10 +00003167{
3168 UChar rm = getUChar(delta);
3169 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003170 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003171 doScalarWidening(
3172 szs,szd,sign_extend,
sewardj5b470602005-02-27 13:10:48 +00003173 getIRegE(szs,pfx,rm)));
sewardj5e525292005-01-28 15:13:10 +00003174 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3175 nameISize(szs),
3176 nameISize(szd),
sewardj5b470602005-02-27 13:10:48 +00003177 nameIRegE(szs,pfx,rm),
3178 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003179 return 1+delta;
3180 }
3181
3182 /* E refers to memory */
3183 {
3184 Int len;
3185 HChar dis_buf[50];
sewardj2e28ac42008-12-04 00:05:12 +00003186 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003187 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003188 doScalarWidening(
3189 szs,szd,sign_extend,
3190 loadLE(szToITy(szs),mkexpr(addr))));
3191 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3192 nameISize(szs),
3193 nameISize(szd),
3194 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00003195 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003196 return len+delta;
3197 }
3198}
sewardj32b2bbe2005-01-28 00:50:10 +00003199
3200
sewardj03b07cc2005-01-31 18:09:43 +00003201/* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3202 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
sewardj32b2bbe2005-01-28 00:50:10 +00003203static
3204void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3205{
sewardj03b07cc2005-01-31 18:09:43 +00003206 /* special-case the 64-bit case */
3207 if (sz == 8) {
3208 IROp op = signed_divide ? Iop_DivModS128to64
3209 : Iop_DivModU128to64;
sewardja6b93d12005-02-17 09:28:28 +00003210 IRTemp src128 = newTemp(Ity_I128);
3211 IRTemp dst128 = newTemp(Ity_I128);
sewardj03b07cc2005-01-31 18:09:43 +00003212 assign( src128, binop(Iop_64HLto128,
sewardja6b93d12005-02-17 09:28:28 +00003213 getIReg64(R_RDX),
3214 getIReg64(R_RAX)) );
sewardj03b07cc2005-01-31 18:09:43 +00003215 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
sewardja6b93d12005-02-17 09:28:28 +00003216 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3217 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
sewardj03b07cc2005-01-31 18:09:43 +00003218 } else {
3219 IROp op = signed_divide ? Iop_DivModS64to32
3220 : Iop_DivModU64to32;
3221 IRTemp src64 = newTemp(Ity_I64);
3222 IRTemp dst64 = newTemp(Ity_I64);
3223 switch (sz) {
sewardj85520e42005-02-19 15:22:38 +00003224 case 4:
sewardj5b470602005-02-27 13:10:48 +00003225 assign( src64,
3226 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3227 assign( dst64,
3228 binop(op, mkexpr(src64), mkexpr(t)) );
3229 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3230 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
sewardj85520e42005-02-19 15:22:38 +00003231 break;
3232 case 2: {
3233 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3234 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3235 assign( src64, unop(widen3264,
3236 binop(Iop_16HLto32,
sewardj5b470602005-02-27 13:10:48 +00003237 getIRegRDX(2),
3238 getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003239 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
sewardj5b470602005-02-27 13:10:48 +00003240 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3241 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
sewardj85520e42005-02-19 15:22:38 +00003242 break;
3243 }
3244 case 1: {
3245 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3246 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3247 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3248 assign( src64, unop(widen3264,
sewardj5b470602005-02-27 13:10:48 +00003249 unop(widen1632, getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003250 assign( dst64,
3251 binop(op, mkexpr(src64),
3252 unop(widen1632, unop(widen816, mkexpr(t)))) );
sewardj5b470602005-02-27 13:10:48 +00003253 putIRegRAX( 1, unop(Iop_16to8,
3254 unop(Iop_32to16,
3255 unop(Iop_64to32,mkexpr(dst64)))) );
3256 putIRegAH( unop(Iop_16to8,
3257 unop(Iop_32to16,
3258 unop(Iop_64HIto32,mkexpr(dst64)))) );
sewardj85520e42005-02-19 15:22:38 +00003259 break;
3260 }
3261 default:
3262 vpanic("codegen_div(amd64)");
sewardj03b07cc2005-01-31 18:09:43 +00003263 }
sewardj32b2bbe2005-01-28 00:50:10 +00003264 }
3265}
sewardj3ca55a12005-01-27 16:06:23 +00003266
3267static
sewardj2e28ac42008-12-04 00:05:12 +00003268ULong dis_Grp1 ( VexAbiInfo* vbi,
3269 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003270 Long delta, UChar modrm,
sewardj227458e2005-01-31 19:04:50 +00003271 Int am_sz, Int d_sz, Int sz, Long d64 )
sewardj3ca55a12005-01-27 16:06:23 +00003272{
3273 Int len;
3274 HChar dis_buf[50];
3275 IRType ty = szToITy(sz);
3276 IRTemp dst1 = newTemp(ty);
3277 IRTemp src = newTemp(ty);
3278 IRTemp dst0 = newTemp(ty);
3279 IRTemp addr = IRTemp_INVALID;
3280 IROp op8 = Iop_INVALID;
sewardj1389d4d2005-01-28 13:46:29 +00003281 ULong mask = mkSizeMask(sz);
sewardj3ca55a12005-01-27 16:06:23 +00003282
sewardj901ed122005-02-27 13:25:31 +00003283 switch (gregLO3ofRM(modrm)) {
sewardj3ca55a12005-01-27 16:06:23 +00003284 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3285 case 2: break; // ADC
3286 case 3: break; // SBB
3287 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3288 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
sewardjfd4203c2007-03-21 00:21:56 +00003289 /*NOTREACHED*/
sewardj3ca55a12005-01-27 16:06:23 +00003290 default: vpanic("dis_Grp1(amd64): unhandled case");
3291 }
3292
3293 if (epartIsReg(modrm)) {
3294 vassert(am_sz == 1);
3295
sewardj5b470602005-02-27 13:10:48 +00003296 assign(dst0, getIRegE(sz,pfx,modrm));
sewardj227458e2005-01-31 19:04:50 +00003297 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003298
sewardj901ed122005-02-27 13:25:31 +00003299 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardje9d8a262009-07-01 08:06:34 +00003300 helper_ADC( sz, dst1, dst0, src,
3301 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003302 } else
sewardj901ed122005-02-27 13:25:31 +00003303 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardje9d8a262009-07-01 08:06:34 +00003304 helper_SBB( sz, dst1, dst0, src,
3305 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003306 } else {
3307 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3308 if (isAddSub(op8))
3309 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3310 else
3311 setFlags_DEP1(op8, dst1, ty);
3312 }
3313
sewardj901ed122005-02-27 13:25:31 +00003314 if (gregLO3ofRM(modrm) < 7)
sewardj5b470602005-02-27 13:10:48 +00003315 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003316
3317 delta += (am_sz + d_sz);
sewardje941eea2005-01-30 19:52:28 +00003318 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003319 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00003320 nameIRegE(sz,pfx,modrm));
sewardj3ca55a12005-01-27 16:06:23 +00003321 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003322 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj3ca55a12005-01-27 16:06:23 +00003323
3324 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj227458e2005-01-31 19:04:50 +00003325 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003326
sewardj901ed122005-02-27 13:25:31 +00003327 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardje9d8a262009-07-01 08:06:34 +00003328 if (pfx & PFX_LOCK) {
3329 /* cas-style store */
3330 helper_ADC( sz, dst1, dst0, src,
3331 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3332 } else {
3333 /* normal store */
3334 helper_ADC( sz, dst1, dst0, src,
3335 /*store*/addr, IRTemp_INVALID, 0 );
3336 }
sewardj3ca55a12005-01-27 16:06:23 +00003337 } else
sewardj901ed122005-02-27 13:25:31 +00003338 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardje9d8a262009-07-01 08:06:34 +00003339 if (pfx & PFX_LOCK) {
3340 /* cas-style store */
3341 helper_SBB( sz, dst1, dst0, src,
3342 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3343 } else {
3344 /* normal store */
3345 helper_SBB( sz, dst1, dst0, src,
3346 /*store*/addr, IRTemp_INVALID, 0 );
3347 }
sewardj3ca55a12005-01-27 16:06:23 +00003348 } else {
3349 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003350 if (gregLO3ofRM(modrm) < 7) {
3351 if (pfx & PFX_LOCK) {
3352 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3353 mkexpr(dst1)/*newVal*/,
3354 guest_RIP_curr_instr );
3355 } else {
3356 storeLE(mkexpr(addr), mkexpr(dst1));
3357 }
3358 }
sewardj3ca55a12005-01-27 16:06:23 +00003359 if (isAddSub(op8))
3360 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3361 else
3362 setFlags_DEP1(op8, dst1, ty);
3363 }
3364
sewardj3ca55a12005-01-27 16:06:23 +00003365 delta += (len+d_sz);
sewardje941eea2005-01-30 19:52:28 +00003366 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003367 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
sewardj227458e2005-01-31 19:04:50 +00003368 d64, dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003369 }
3370 return delta;
3371}
3372
3373
sewardj118b23e2005-01-29 02:14:44 +00003374/* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3375 expression. */
3376
3377static
sewardj2e28ac42008-12-04 00:05:12 +00003378ULong dis_Grp2 ( VexAbiInfo* vbi,
3379 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003380 Long delta, UChar modrm,
sewardj118b23e2005-01-29 02:14:44 +00003381 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
sewardjfd4203c2007-03-21 00:21:56 +00003382 HChar* shift_expr_txt, Bool* decode_OK )
sewardj118b23e2005-01-29 02:14:44 +00003383{
3384 /* delta on entry points at the modrm byte. */
3385 HChar dis_buf[50];
3386 Int len;
sewardjb5e5c6d2007-01-12 20:29:01 +00003387 Bool isShift, isRotate, isRotateC;
sewardj118b23e2005-01-29 02:14:44 +00003388 IRType ty = szToITy(sz);
3389 IRTemp dst0 = newTemp(ty);
3390 IRTemp dst1 = newTemp(ty);
3391 IRTemp addr = IRTemp_INVALID;
3392
sewardjfd4203c2007-03-21 00:21:56 +00003393 *decode_OK = True;
3394
sewardj118b23e2005-01-29 02:14:44 +00003395 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3396
3397 /* Put value to shift/rotate in dst0. */
3398 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003399 assign(dst0, getIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00003400 delta += (am_sz + d_sz);
3401 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003402 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj118b23e2005-01-29 02:14:44 +00003403 assign(dst0, loadLE(ty,mkexpr(addr)));
3404 delta += len + d_sz;
3405 }
3406
3407 isShift = False;
tomd6b43fd2011-08-19 16:06:52 +00003408 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
sewardj118b23e2005-01-29 02:14:44 +00003409
3410 isRotate = False;
sewardj901ed122005-02-27 13:25:31 +00003411 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
sewardj118b23e2005-01-29 02:14:44 +00003412
sewardjb5e5c6d2007-01-12 20:29:01 +00003413 isRotateC = False;
3414 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
sewardj118b23e2005-01-29 02:14:44 +00003415
sewardjb5e5c6d2007-01-12 20:29:01 +00003416 if (!isShift && !isRotate && !isRotateC) {
sewardjfd4203c2007-03-21 00:21:56 +00003417 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003418 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3419 }
3420
sewardjb5e5c6d2007-01-12 20:29:01 +00003421 if (isRotateC) {
sewardj112b0992005-07-23 13:19:32 +00003422 /* Call a helper; this insn is so ridiculous it does not deserve
3423 better. One problem is, the helper has to calculate both the
3424 new value and the new flags. This is more than 64 bits, and
3425 there is no way to return more than 64 bits from the helper.
3426 Hence the crude and obvious solution is to call it twice,
3427 using the sign of the sz field to indicate whether it is the
3428 value or rflags result we want.
3429 */
sewardjb5e5c6d2007-01-12 20:29:01 +00003430 Bool left = toBool(gregLO3ofRM(modrm) == 2);
sewardj112b0992005-07-23 13:19:32 +00003431 IRExpr** argsVALUE;
3432 IRExpr** argsRFLAGS;
3433
3434 IRTemp new_value = newTemp(Ity_I64);
3435 IRTemp new_rflags = newTemp(Ity_I64);
3436 IRTemp old_rflags = newTemp(Ity_I64);
3437
3438 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3439
3440 argsVALUE
3441 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3442 widenUto64(shift_expr), /* rotate amount */
3443 mkexpr(old_rflags),
3444 mkU64(sz) );
3445 assign( new_value,
3446 mkIRExprCCall(
3447 Ity_I64,
3448 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003449 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3450 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003451 argsVALUE
3452 )
3453 );
3454
3455 argsRFLAGS
3456 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3457 widenUto64(shift_expr), /* rotate amount */
3458 mkexpr(old_rflags),
3459 mkU64(-sz) );
3460 assign( new_rflags,
3461 mkIRExprCCall(
3462 Ity_I64,
3463 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003464 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3465 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003466 argsRFLAGS
3467 )
3468 );
3469
3470 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3471 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3472 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3473 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3474 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
sewardj118b23e2005-01-29 02:14:44 +00003475 }
3476
sewardj112b0992005-07-23 13:19:32 +00003477 else
sewardj118b23e2005-01-29 02:14:44 +00003478 if (isShift) {
3479
3480 IRTemp pre64 = newTemp(Ity_I64);
3481 IRTemp res64 = newTemp(Ity_I64);
3482 IRTemp res64ss = newTemp(Ity_I64);
3483 IRTemp shift_amt = newTemp(Ity_I8);
sewardj1027dc22005-02-26 01:55:02 +00003484 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003485 IROp op64;
3486
sewardj901ed122005-02-27 13:25:31 +00003487 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00003488 case 4: op64 = Iop_Shl64; break;
3489 case 5: op64 = Iop_Shr64; break;
tomd6b43fd2011-08-19 16:06:52 +00003490 case 6: op64 = Iop_Shl64; break;
sewardj118b23e2005-01-29 02:14:44 +00003491 case 7: op64 = Iop_Sar64; break;
sewardjfd4203c2007-03-21 00:21:56 +00003492 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003493 default: vpanic("dis_Grp2:shift"); break;
3494 }
3495
3496 /* Widen the value to be shifted to 64 bits, do the shift, and
3497 narrow back down. This seems surprisingly long-winded, but
3498 unfortunately the AMD semantics requires that 8/16/32-bit
3499 shifts give defined results for shift values all the way up
sewardj03c96e82005-02-19 18:12:45 +00003500 to 32, and this seems the simplest way to do it. It has the
sewardj118b23e2005-01-29 02:14:44 +00003501 advantage that the only IR level shifts generated are of 64
3502 bit values, and the shift amount is guaranteed to be in the
3503 range 0 .. 63, thereby observing the IR semantics requiring
sewardj03c96e82005-02-19 18:12:45 +00003504 all shift values to be in the range 0 .. 2^word_size-1.
sewardj118b23e2005-01-29 02:14:44 +00003505
sewardj03c96e82005-02-19 18:12:45 +00003506 Therefore the shift amount is masked with 63 for 64-bit shifts
3507 and 31 for all others.
3508 */
3509 /* shift_amt = shift_expr & MASK, regardless of operation size */
3510 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
sewardj118b23e2005-01-29 02:14:44 +00003511
sewardj03c96e82005-02-19 18:12:45 +00003512 /* suitably widen the value to be shifted to 64 bits. */
sewardj118b23e2005-01-29 02:14:44 +00003513 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3514 : widenUto64(mkexpr(dst0)) );
3515
3516 /* res64 = pre64 `shift` shift_amt */
3517 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3518
sewardj03c96e82005-02-19 18:12:45 +00003519 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
sewardj118b23e2005-01-29 02:14:44 +00003520 assign( res64ss,
3521 binop(op64,
3522 mkexpr(pre64),
3523 binop(Iop_And8,
3524 binop(Iop_Sub8,
3525 mkexpr(shift_amt), mkU8(1)),
sewardj03c96e82005-02-19 18:12:45 +00003526 mkU8(mask))) );
sewardj118b23e2005-01-29 02:14:44 +00003527
3528 /* Build the flags thunk. */
3529 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3530
3531 /* Narrow the result back down. */
3532 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3533
3534 } /* if (isShift) */
3535
3536 else
3537 if (isRotate) {
3538 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3539 : (ty==Ity_I32 ? 2 : 3));
sewardj901ed122005-02-27 13:25:31 +00003540 Bool left = toBool(gregLO3ofRM(modrm) == 0);
sewardj118b23e2005-01-29 02:14:44 +00003541 IRTemp rot_amt = newTemp(Ity_I8);
3542 IRTemp rot_amt64 = newTemp(Ity_I8);
3543 IRTemp oldFlags = newTemp(Ity_I64);
sewardj1027dc22005-02-26 01:55:02 +00003544 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003545
3546 /* rot_amt = shift_expr & mask */
3547 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3548 expressions never shift beyond the word size and thus remain
3549 well defined. */
sewardj03c96e82005-02-19 18:12:45 +00003550 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
sewardj118b23e2005-01-29 02:14:44 +00003551
3552 if (ty == Ity_I64)
3553 assign(rot_amt, mkexpr(rot_amt64));
3554 else
3555 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3556
3557 if (left) {
3558
3559 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3560 assign(dst1,
3561 binop( mkSizedOp(ty,Iop_Or8),
3562 binop( mkSizedOp(ty,Iop_Shl8),
3563 mkexpr(dst0),
3564 mkexpr(rot_amt)
3565 ),
3566 binop( mkSizedOp(ty,Iop_Shr8),
3567 mkexpr(dst0),
3568 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3569 )
3570 )
3571 );
3572 ccOp += AMD64G_CC_OP_ROLB;
3573
3574 } else { /* right */
3575
3576 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3577 assign(dst1,
3578 binop( mkSizedOp(ty,Iop_Or8),
3579 binop( mkSizedOp(ty,Iop_Shr8),
3580 mkexpr(dst0),
3581 mkexpr(rot_amt)
3582 ),
3583 binop( mkSizedOp(ty,Iop_Shl8),
3584 mkexpr(dst0),
3585 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3586 )
3587 )
3588 );
3589 ccOp += AMD64G_CC_OP_RORB;
3590
3591 }
3592
3593 /* dst1 now holds the rotated value. Build flag thunk. We
3594 need the resulting value for this, and the previous flags.
3595 Except don't set it if the rotate count is zero. */
3596
3597 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3598
3599 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3600 stmt( IRStmt_Put( OFFB_CC_OP,
3601 IRExpr_Mux0X( mkexpr(rot_amt64),
3602 IRExpr_Get(OFFB_CC_OP,Ity_I64),
3603 mkU64(ccOp))) );
3604 stmt( IRStmt_Put( OFFB_CC_DEP1,
3605 IRExpr_Mux0X( mkexpr(rot_amt64),
3606 IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
3607 widenUto64(mkexpr(dst1)))) );
3608 stmt( IRStmt_Put( OFFB_CC_DEP2,
3609 IRExpr_Mux0X( mkexpr(rot_amt64),
3610 IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
3611 mkU64(0))) );
3612 stmt( IRStmt_Put( OFFB_CC_NDEP,
3613 IRExpr_Mux0X( mkexpr(rot_amt64),
3614 IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
3615 mkexpr(oldFlags))) );
3616 } /* if (isRotate) */
3617
3618 /* Save result, and finish up. */
3619 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003620 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj118b23e2005-01-29 02:14:44 +00003621 if (vex_traceflags & VEX_TRACE_FE) {
3622 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003623 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003624 if (shift_expr_txt)
3625 vex_printf("%s", shift_expr_txt);
3626 else
3627 ppIRExpr(shift_expr);
sewardj5b470602005-02-27 13:10:48 +00003628 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
sewardj118b23e2005-01-29 02:14:44 +00003629 }
3630 } else {
3631 storeLE(mkexpr(addr), mkexpr(dst1));
3632 if (vex_traceflags & VEX_TRACE_FE) {
3633 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003634 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003635 if (shift_expr_txt)
3636 vex_printf("%s", shift_expr_txt);
3637 else
3638 ppIRExpr(shift_expr);
3639 vex_printf(", %s\n", dis_buf);
3640 }
3641 }
3642 return delta;
3643}
3644
3645
sewardj1d511802005-03-27 17:59:45 +00003646/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3647static
sewardj2e28ac42008-12-04 00:05:12 +00003648ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
3649 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003650 Long delta, UChar modrm,
sewardj1d511802005-03-27 17:59:45 +00003651 Int am_sz, Int sz, ULong src_val,
3652 Bool* decode_OK )
3653{
3654 /* src_val denotes a d8.
3655 And delta on entry points at the modrm byte. */
sewardj118b23e2005-01-29 02:14:44 +00003656
sewardj1d511802005-03-27 17:59:45 +00003657 IRType ty = szToITy(sz);
3658 IRTemp t2 = newTemp(Ity_I64);
3659 IRTemp t2m = newTemp(Ity_I64);
3660 IRTemp t_addr = IRTemp_INVALID;
3661 HChar dis_buf[50];
3662 ULong mask;
sewardj9b967672005-02-08 11:13:09 +00003663
sewardj1d511802005-03-27 17:59:45 +00003664 /* we're optimists :-) */
3665 *decode_OK = True;
3666
3667 /* Limit src_val -- the bit offset -- to something within a word.
3668 The Intel docs say that literal offsets larger than a word are
3669 masked in this way. */
3670 switch (sz) {
3671 case 2: src_val &= 15; break;
3672 case 4: src_val &= 31; break;
sewardj537cab02005-04-07 02:03:52 +00003673 case 8: src_val &= 63; break;
sewardj1d511802005-03-27 17:59:45 +00003674 default: *decode_OK = False; return delta;
3675 }
3676
3677 /* Invent a mask suitable for the operation. */
3678 switch (gregLO3ofRM(modrm)) {
sewardj74b4f892005-05-06 01:43:56 +00003679 case 4: /* BT */ mask = 0; break;
3680 case 5: /* BTS */ mask = 1ULL << src_val; break;
3681 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3682 case 7: /* BTC */ mask = 1ULL << src_val; break;
sewardj1d511802005-03-27 17:59:45 +00003683 /* If this needs to be extended, probably simplest to make a
3684 new function to handle the other cases (0 .. 3). The
3685 Intel docs do however not indicate any use for 0 .. 3, so
3686 we don't expect this to happen. */
3687 default: *decode_OK = False; return delta;
3688 }
3689
3690 /* Fetch the value to be tested and modified into t2, which is
3691 64-bits wide regardless of sz. */
3692 if (epartIsReg(modrm)) {
3693 vassert(am_sz == 1);
3694 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3695 delta += (am_sz + 1);
3696 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3697 nameISize(sz),
3698 src_val, nameIRegE(sz,pfx,modrm));
3699 } else {
3700 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00003701 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
sewardj1d511802005-03-27 17:59:45 +00003702 delta += (len+1);
3703 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3704 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3705 nameISize(sz),
3706 src_val, dis_buf);
3707 }
3708
sewardj1d511802005-03-27 17:59:45 +00003709 /* Compute the new value into t2m, if non-BT. */
3710 switch (gregLO3ofRM(modrm)) {
3711 case 4: /* BT */
3712 break;
3713 case 5: /* BTS */
3714 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3715 break;
3716 case 6: /* BTR */
3717 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3718 break;
3719 case 7: /* BTC */
3720 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3721 break;
3722 default:
sewardje9d8a262009-07-01 08:06:34 +00003723 /*NOTREACHED*/ /*the previous switch guards this*/
sewardj1d511802005-03-27 17:59:45 +00003724 vassert(0);
3725 }
3726
3727 /* Write the result back, if non-BT. */
3728 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3729 if (epartIsReg(modrm)) {
3730 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
3731 } else {
sewardje9d8a262009-07-01 08:06:34 +00003732 if (pfx & PFX_LOCK) {
3733 casLE( mkexpr(t_addr),
3734 narrowTo(ty, mkexpr(t2))/*expd*/,
3735 narrowTo(ty, mkexpr(t2m))/*new*/,
3736 guest_RIP_curr_instr );
3737 } else {
3738 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
3739 }
sewardj1d511802005-03-27 17:59:45 +00003740 }
3741 }
3742
sewardje9d8a262009-07-01 08:06:34 +00003743 /* Copy relevant bit from t2 into the carry flag. */
3744 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
3745 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3746 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3747 stmt( IRStmt_Put(
3748 OFFB_CC_DEP1,
3749 binop(Iop_And64,
3750 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
3751 mkU64(1))
3752 ));
3753 /* Set NDEP even though it isn't used. This makes redundant-PUT
3754 elimination of previous stores to this field work better. */
3755 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3756
sewardj1d511802005-03-27 17:59:45 +00003757 return delta;
3758}
sewardj9b967672005-02-08 11:13:09 +00003759
3760
3761/* Signed/unsigned widening multiply. Generate IR to multiply the
3762 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
3763 RDX:RAX/EDX:EAX/DX:AX/AX.
3764*/
3765static void codegen_mulL_A_D ( Int sz, Bool syned,
sewardj1027dc22005-02-26 01:55:02 +00003766 IRTemp tmp, HChar* tmp_txt )
sewardj9b967672005-02-08 11:13:09 +00003767{
3768 IRType ty = szToITy(sz);
3769 IRTemp t1 = newTemp(ty);
3770
sewardj5b470602005-02-27 13:10:48 +00003771 assign( t1, getIRegRAX(sz) );
sewardj9b967672005-02-08 11:13:09 +00003772
3773 switch (ty) {
3774 case Ity_I64: {
3775 IRTemp res128 = newTemp(Ity_I128);
3776 IRTemp resHi = newTemp(Ity_I64);
3777 IRTemp resLo = newTemp(Ity_I64);
3778 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
sewardj8bdb89a2005-05-05 21:46:50 +00003779 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
sewardj9b967672005-02-08 11:13:09 +00003780 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
3781 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3782 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
3783 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
sewardj5b470602005-02-27 13:10:48 +00003784 putIReg64(R_RDX, mkexpr(resHi));
3785 putIReg64(R_RAX, mkexpr(resLo));
sewardj9b967672005-02-08 11:13:09 +00003786 break;
3787 }
sewardj85520e42005-02-19 15:22:38 +00003788 case Ity_I32: {
3789 IRTemp res64 = newTemp(Ity_I64);
3790 IRTemp resHi = newTemp(Ity_I32);
3791 IRTemp resLo = newTemp(Ity_I32);
3792 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
3793 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3794 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
3795 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3796 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
3797 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
sewardj5b470602005-02-27 13:10:48 +00003798 putIRegRDX(4, mkexpr(resHi));
3799 putIRegRAX(4, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003800 break;
3801 }
3802 case Ity_I16: {
3803 IRTemp res32 = newTemp(Ity_I32);
3804 IRTemp resHi = newTemp(Ity_I16);
3805 IRTemp resLo = newTemp(Ity_I16);
3806 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
3807 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3808 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
3809 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3810 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
3811 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
sewardj5b470602005-02-27 13:10:48 +00003812 putIRegRDX(2, mkexpr(resHi));
3813 putIRegRAX(2, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003814 break;
3815 }
3816 case Ity_I8: {
3817 IRTemp res16 = newTemp(Ity_I16);
3818 IRTemp resHi = newTemp(Ity_I8);
3819 IRTemp resLo = newTemp(Ity_I8);
3820 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
3821 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3822 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
3823 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3824 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
3825 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
sewardj5b470602005-02-27 13:10:48 +00003826 putIRegRAX(2, mkexpr(res16));
sewardj85520e42005-02-19 15:22:38 +00003827 break;
3828 }
sewardj9b967672005-02-08 11:13:09 +00003829 default:
sewardj85520e42005-02-19 15:22:38 +00003830 ppIRType(ty);
sewardj9b967672005-02-08 11:13:09 +00003831 vpanic("codegen_mulL_A_D(amd64)");
3832 }
3833 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
3834}
sewardj32b2bbe2005-01-28 00:50:10 +00003835
3836
3837/* Group 3 extended opcodes. */
3838static
sewardj2e28ac42008-12-04 00:05:12 +00003839ULong dis_Grp3 ( VexAbiInfo* vbi,
3840 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
sewardj32b2bbe2005-01-28 00:50:10 +00003841{
sewardj227458e2005-01-31 19:04:50 +00003842 Long d64;
sewardj32b2bbe2005-01-28 00:50:10 +00003843 UChar modrm;
3844 HChar dis_buf[50];
3845 Int len;
3846 IRTemp addr;
3847 IRType ty = szToITy(sz);
3848 IRTemp t1 = newTemp(ty);
sewardj55dbb262005-01-28 16:36:51 +00003849 IRTemp dst1, src, dst0;
sewardjfd4203c2007-03-21 00:21:56 +00003850 *decode_OK = True;
sewardj8c332e22005-01-28 01:36:56 +00003851 modrm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00003852 if (epartIsReg(modrm)) {
sewardj901ed122005-02-27 13:25:31 +00003853 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00003854 case 0: { /* TEST */
3855 delta++;
3856 d64 = getSDisp(imin(4,sz), delta);
3857 delta += imin(4,sz);
3858 dst1 = newTemp(ty);
3859 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
sewardj5b470602005-02-27 13:10:48 +00003860 getIRegE(sz,pfx,modrm),
sewardj03b07cc2005-01-31 18:09:43 +00003861 mkU(ty, d64 & mkSizeMask(sz))));
sewardj118b23e2005-01-29 02:14:44 +00003862 setFlags_DEP1( Iop_And8, dst1, ty );
sewardj7eaa7cf2005-01-31 18:55:22 +00003863 DIP("test%c $%lld, %s\n",
sewardj118b23e2005-01-29 02:14:44 +00003864 nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00003865 nameIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00003866 break;
3867 }
sewardjfd4203c2007-03-21 00:21:56 +00003868 case 1:
3869 *decode_OK = False;
3870 return delta;
sewardj55dbb262005-01-28 16:36:51 +00003871 case 2: /* NOT */
3872 delta++;
sewardj5b470602005-02-27 13:10:48 +00003873 putIRegE(sz, pfx, modrm,
3874 unop(mkSizedOp(ty,Iop_Not8),
3875 getIRegE(sz, pfx, modrm)));
sewardj55dbb262005-01-28 16:36:51 +00003876 DIP("not%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00003877 nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00003878 break;
3879 case 3: /* NEG */
3880 delta++;
3881 dst0 = newTemp(ty);
3882 src = newTemp(ty);
3883 dst1 = newTemp(ty);
3884 assign(dst0, mkU(ty,0));
sewardj5b470602005-02-27 13:10:48 +00003885 assign(src, getIRegE(sz, pfx, modrm));
sewardj2e28ac42008-12-04 00:05:12 +00003886 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3887 mkexpr(src)));
sewardj55dbb262005-01-28 16:36:51 +00003888 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj5b470602005-02-27 13:10:48 +00003889 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3890 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00003891 break;
sewardj9b967672005-02-08 11:13:09 +00003892 case 4: /* MUL (unsigned widening) */
3893 delta++;
3894 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00003895 assign(src, getIRegE(sz,pfx,modrm));
sewardj9b967672005-02-08 11:13:09 +00003896 codegen_mulL_A_D ( sz, False, src,
sewardj5b470602005-02-27 13:10:48 +00003897 nameIRegE(sz,pfx,modrm) );
sewardj9b967672005-02-08 11:13:09 +00003898 break;
sewardj85520e42005-02-19 15:22:38 +00003899 case 5: /* IMUL (signed widening) */
3900 delta++;
3901 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00003902 assign(src, getIRegE(sz,pfx,modrm));
sewardj85520e42005-02-19 15:22:38 +00003903 codegen_mulL_A_D ( sz, True, src,
sewardj5b470602005-02-27 13:10:48 +00003904 nameIRegE(sz,pfx,modrm) );
sewardj85520e42005-02-19 15:22:38 +00003905 break;
sewardj03b07cc2005-01-31 18:09:43 +00003906 case 6: /* DIV */
3907 delta++;
sewardj5b470602005-02-27 13:10:48 +00003908 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj03b07cc2005-01-31 18:09:43 +00003909 codegen_div ( sz, t1, False );
3910 DIP("div%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00003911 nameIRegE(sz, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00003912 break;
sewardj32b2bbe2005-01-28 00:50:10 +00003913 case 7: /* IDIV */
3914 delta++;
sewardj5b470602005-02-27 13:10:48 +00003915 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj32b2bbe2005-01-28 00:50:10 +00003916 codegen_div ( sz, t1, True );
3917 DIP("idiv%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00003918 nameIRegE(sz, pfx, modrm));
sewardj32b2bbe2005-01-28 00:50:10 +00003919 break;
3920 default:
sewardjfd4203c2007-03-21 00:21:56 +00003921 /*NOTREACHED*/
3922 vpanic("Grp3(amd64,R)");
sewardj32b2bbe2005-01-28 00:50:10 +00003923 }
3924 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003925 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00003926 /* we have to inform disAMode of any immediate
3927 bytes used */
sewardj901ed122005-02-27 13:25:31 +00003928 gregLO3ofRM(modrm)==0/*TEST*/
sewardj7de0d3c2005-02-13 02:26:41 +00003929 ? imin(4,sz)
3930 : 0
3931 );
sewardj32b2bbe2005-01-28 00:50:10 +00003932 t1 = newTemp(ty);
3933 delta += len;
3934 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj901ed122005-02-27 13:25:31 +00003935 switch (gregLO3ofRM(modrm)) {
sewardj03b07cc2005-01-31 18:09:43 +00003936 case 0: { /* TEST */
3937 d64 = getSDisp(imin(4,sz), delta);
3938 delta += imin(4,sz);
3939 dst1 = newTemp(ty);
3940 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
3941 mkexpr(t1),
3942 mkU(ty, d64 & mkSizeMask(sz))));
3943 setFlags_DEP1( Iop_And8, dst1, ty );
3944 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
3945 break;
3946 }
sewardjfd4203c2007-03-21 00:21:56 +00003947 case 1:
3948 *decode_OK = False;
3949 return delta;
sewardj82c9f2f2005-03-02 16:05:13 +00003950 case 2: /* NOT */
sewardje9d8a262009-07-01 08:06:34 +00003951 dst1 = newTemp(ty);
3952 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
3953 if (pfx & PFX_LOCK) {
3954 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3955 guest_RIP_curr_instr );
3956 } else {
3957 storeLE( mkexpr(addr), mkexpr(dst1) );
3958 }
sewardj82c9f2f2005-03-02 16:05:13 +00003959 DIP("not%c %s\n", nameISize(sz), dis_buf);
3960 break;
sewardj7de0d3c2005-02-13 02:26:41 +00003961 case 3: /* NEG */
3962 dst0 = newTemp(ty);
3963 src = newTemp(ty);
3964 dst1 = newTemp(ty);
3965 assign(dst0, mkU(ty,0));
3966 assign(src, mkexpr(t1));
sewardj2e28ac42008-12-04 00:05:12 +00003967 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
3968 mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003969 if (pfx & PFX_LOCK) {
3970 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
3971 guest_RIP_curr_instr );
3972 } else {
3973 storeLE( mkexpr(addr), mkexpr(dst1) );
3974 }
sewardj7de0d3c2005-02-13 02:26:41 +00003975 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj7de0d3c2005-02-13 02:26:41 +00003976 DIP("neg%c %s\n", nameISize(sz), dis_buf);
3977 break;
sewardj31eecde2005-03-23 03:39:55 +00003978 case 4: /* MUL (unsigned widening) */
3979 codegen_mulL_A_D ( sz, False, t1, dis_buf );
3980 break;
sewardj3aba9eb2005-03-30 23:20:47 +00003981 case 5: /* IMUL */
3982 codegen_mulL_A_D ( sz, True, t1, dis_buf );
3983 break;
sewardj1001dc42005-02-21 08:25:55 +00003984 case 6: /* DIV */
3985 codegen_div ( sz, t1, False );
3986 DIP("div%c %s\n", nameISize(sz), dis_buf);
3987 break;
sewardj82c9f2f2005-03-02 16:05:13 +00003988 case 7: /* IDIV */
3989 codegen_div ( sz, t1, True );
3990 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
3991 break;
sewardj32b2bbe2005-01-28 00:50:10 +00003992 default:
sewardjfd4203c2007-03-21 00:21:56 +00003993 /*NOTREACHED*/
3994 vpanic("Grp3(amd64,M)");
sewardj32b2bbe2005-01-28 00:50:10 +00003995 }
3996 }
3997 return delta;
3998}
3999
4000
sewardj03b07cc2005-01-31 18:09:43 +00004001/* Group 4 extended opcodes. */
4002static
sewardj2e28ac42008-12-04 00:05:12 +00004003ULong dis_Grp4 ( VexAbiInfo* vbi,
4004 Prefix pfx, Long delta, Bool* decode_OK )
sewardj03b07cc2005-01-31 18:09:43 +00004005{
4006 Int alen;
4007 UChar modrm;
4008 HChar dis_buf[50];
4009 IRType ty = Ity_I8;
4010 IRTemp t1 = newTemp(ty);
4011 IRTemp t2 = newTemp(ty);
4012
sewardjfd4203c2007-03-21 00:21:56 +00004013 *decode_OK = True;
4014
sewardj03b07cc2005-01-31 18:09:43 +00004015 modrm = getUChar(delta);
4016 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00004017 assign(t1, getIRegE(1, pfx, modrm));
sewardj901ed122005-02-27 13:25:31 +00004018 switch (gregLO3ofRM(modrm)) {
sewardj85520e42005-02-19 15:22:38 +00004019 case 0: /* INC */
4020 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004021 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj85520e42005-02-19 15:22:38 +00004022 setFlags_INC_DEC( True, t2, ty );
4023 break;
sewardj03b07cc2005-01-31 18:09:43 +00004024 case 1: /* DEC */
4025 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004026 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj03b07cc2005-01-31 18:09:43 +00004027 setFlags_INC_DEC( False, t2, ty );
4028 break;
4029 default:
sewardjfd4203c2007-03-21 00:21:56 +00004030 *decode_OK = False;
4031 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004032 }
4033 delta++;
sewardj901ed122005-02-27 13:25:31 +00004034 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
sewardj5b470602005-02-27 13:10:48 +00004035 nameIRegE(1, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004036 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004037 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj03b07cc2005-01-31 18:09:43 +00004038 assign( t1, loadLE(ty, mkexpr(addr)) );
sewardj901ed122005-02-27 13:25:31 +00004039 switch (gregLO3ofRM(modrm)) {
sewardj007e9ec2005-03-23 11:36:48 +00004040 case 0: /* INC */
4041 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardje9d8a262009-07-01 08:06:34 +00004042 if (pfx & PFX_LOCK) {
4043 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4044 guest_RIP_curr_instr );
4045 } else {
4046 storeLE( mkexpr(addr), mkexpr(t2) );
4047 }
sewardj007e9ec2005-03-23 11:36:48 +00004048 setFlags_INC_DEC( True, t2, ty );
4049 break;
4050 case 1: /* DEC */
4051 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardje9d8a262009-07-01 08:06:34 +00004052 if (pfx & PFX_LOCK) {
4053 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4054 guest_RIP_curr_instr );
4055 } else {
4056 storeLE( mkexpr(addr), mkexpr(t2) );
4057 }
sewardj007e9ec2005-03-23 11:36:48 +00004058 setFlags_INC_DEC( False, t2, ty );
4059 break;
sewardj03b07cc2005-01-31 18:09:43 +00004060 default:
sewardjfd4203c2007-03-21 00:21:56 +00004061 *decode_OK = False;
4062 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004063 }
4064 delta += alen;
sewardj901ed122005-02-27 13:25:31 +00004065 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
sewardj03b07cc2005-01-31 18:09:43 +00004066 }
4067 return delta;
4068}
sewardj354e5c62005-01-27 20:12:52 +00004069
4070
4071/* Group 5 extended opcodes. */
4072static
sewardjdd40fdf2006-12-24 02:20:24 +00004073ULong dis_Grp5 ( VexAbiInfo* vbi,
sewardjfd4203c2007-03-21 00:21:56 +00004074 Prefix pfx, Int sz, Long delta,
sewardjc6f970f2012-04-02 21:54:49 +00004075 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
sewardj354e5c62005-01-27 20:12:52 +00004076{
4077 Int len;
4078 UChar modrm;
4079 HChar dis_buf[50];
4080 IRTemp addr = IRTemp_INVALID;
4081 IRType ty = szToITy(sz);
4082 IRTemp t1 = newTemp(ty);
4083 IRTemp t2 = IRTemp_INVALID;
sewardj03b07cc2005-01-31 18:09:43 +00004084 IRTemp t3 = IRTemp_INVALID;
sewardj7eaa7cf2005-01-31 18:55:22 +00004085 Bool showSz = True;
sewardj354e5c62005-01-27 20:12:52 +00004086
sewardjfd4203c2007-03-21 00:21:56 +00004087 *decode_OK = True;
4088
sewardj8c332e22005-01-28 01:36:56 +00004089 modrm = getUChar(delta);
sewardj354e5c62005-01-27 20:12:52 +00004090 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00004091 assign(t1, getIRegE(sz,pfx,modrm));
sewardj901ed122005-02-27 13:25:31 +00004092 switch (gregLO3ofRM(modrm)) {
sewardj32b2bbe2005-01-28 00:50:10 +00004093 case 0: /* INC */
4094 t2 = newTemp(ty);
4095 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4096 mkexpr(t1), mkU(ty,1)));
4097 setFlags_INC_DEC( True, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004098 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004099 break;
4100 case 1: /* DEC */
4101 t2 = newTemp(ty);
4102 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4103 mkexpr(t1), mkU(ty,1)));
4104 setFlags_INC_DEC( False, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004105 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004106 break;
sewardj354e5c62005-01-27 20:12:52 +00004107 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004108 /* Ignore any sz value and operate as if sz==8. */
sewardjd7a544b2007-11-19 00:39:23 +00004109 if (!(sz == 4 || sz == 8)) goto unhandled;
sewardj03b07cc2005-01-31 18:09:43 +00004110 sz = 8;
4111 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004112 assign(t3, getIRegE(sz,pfx,modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004113 t2 = newTemp(Ity_I64);
4114 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4115 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004116 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
sewardj478646f2008-05-01 20:13:04 +00004117 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
sewardjc6f970f2012-04-02 21:54:49 +00004118 jmp_treg(dres, Ijk_Call, t3);
4119 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004120 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004121 break;
sewardj354e5c62005-01-27 20:12:52 +00004122 case 4: /* jmp Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004123 /* Ignore any sz value and operate as if sz==8. */
sewardjd7a544b2007-11-19 00:39:23 +00004124 if (!(sz == 4 || sz == 8)) goto unhandled;
sewardj03b07cc2005-01-31 18:09:43 +00004125 sz = 8;
4126 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004127 assign(t3, getIRegE(sz,pfx,modrm));
sewardjc6f970f2012-04-02 21:54:49 +00004128 jmp_treg(dres, Ijk_Boring, t3);
4129 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004130 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004131 break;
sewardj354e5c62005-01-27 20:12:52 +00004132 default:
sewardjfd4203c2007-03-21 00:21:56 +00004133 *decode_OK = False;
4134 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004135 }
4136 delta++;
sewardj901ed122005-02-27 13:25:31 +00004137 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004138 showSz ? nameISize(sz) : ' ',
sewardj5b470602005-02-27 13:10:48 +00004139 nameIRegE(sz, pfx, modrm));
sewardj354e5c62005-01-27 20:12:52 +00004140 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004141 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj901ed122005-02-27 13:25:31 +00004142 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4143 && gregLO3ofRM(modrm) != 6) {
sewardj03b07cc2005-01-31 18:09:43 +00004144 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj909c06d2005-02-19 22:47:41 +00004145 }
sewardj901ed122005-02-27 13:25:31 +00004146 switch (gregLO3ofRM(modrm)) {
sewardj354e5c62005-01-27 20:12:52 +00004147 case 0: /* INC */
sewardj354e5c62005-01-27 20:12:52 +00004148 t2 = newTemp(ty);
4149 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4150 mkexpr(t1), mkU(ty,1)));
sewardje9d8a262009-07-01 08:06:34 +00004151 if (pfx & PFX_LOCK) {
4152 casLE( mkexpr(addr),
4153 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4154 } else {
4155 storeLE(mkexpr(addr),mkexpr(t2));
4156 }
sewardj354e5c62005-01-27 20:12:52 +00004157 setFlags_INC_DEC( True, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004158 break;
sewardj354e5c62005-01-27 20:12:52 +00004159 case 1: /* DEC */
4160 t2 = newTemp(ty);
4161 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4162 mkexpr(t1), mkU(ty,1)));
sewardje9d8a262009-07-01 08:06:34 +00004163 if (pfx & PFX_LOCK) {
4164 casLE( mkexpr(addr),
4165 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4166 } else {
4167 storeLE(mkexpr(addr),mkexpr(t2));
4168 }
sewardj354e5c62005-01-27 20:12:52 +00004169 setFlags_INC_DEC( False, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004170 break;
4171 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004172 /* Ignore any sz value and operate as if sz==8. */
sewardjd7a544b2007-11-19 00:39:23 +00004173 if (!(sz == 4 || sz == 8)) goto unhandled;
sewardj7eaa7cf2005-01-31 18:55:22 +00004174 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004175 t3 = newTemp(Ity_I64);
4176 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4177 t2 = newTemp(Ity_I64);
4178 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4179 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004180 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
sewardj478646f2008-05-01 20:13:04 +00004181 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
sewardjc6f970f2012-04-02 21:54:49 +00004182 jmp_treg(dres, Ijk_Call, t3);
4183 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004184 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004185 break;
sewardj354e5c62005-01-27 20:12:52 +00004186 case 4: /* JMP Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004187 /* Ignore any sz value and operate as if sz==8. */
sewardjd7a544b2007-11-19 00:39:23 +00004188 if (!(sz == 4 || sz == 8)) goto unhandled;
sewardj7eaa7cf2005-01-31 18:55:22 +00004189 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004190 t3 = newTemp(Ity_I64);
4191 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
sewardjc6f970f2012-04-02 21:54:49 +00004192 jmp_treg(dres, Ijk_Boring, t3);
4193 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004194 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004195 break;
sewardj354e5c62005-01-27 20:12:52 +00004196 case 6: /* PUSH Ev */
sewardja6b93d12005-02-17 09:28:28 +00004197 /* There is no encoding for 32-bit operand size; hence ... */
4198 if (sz == 4) sz = 8;
sewardjd7a544b2007-11-19 00:39:23 +00004199 if (!(sz == 8 || sz == 2)) goto unhandled;
sewardj909c06d2005-02-19 22:47:41 +00004200 if (sz == 8) {
4201 t3 = newTemp(Ity_I64);
4202 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4203 t2 = newTemp(Ity_I64);
4204 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4205 putIReg64(R_RSP, mkexpr(t2) );
4206 storeLE( mkexpr(t2), mkexpr(t3) );
4207 break;
4208 } else {
4209 goto unhandled; /* awaiting test case */
4210 }
sewardj354e5c62005-01-27 20:12:52 +00004211 default:
sewardja6b93d12005-02-17 09:28:28 +00004212 unhandled:
sewardjfd4203c2007-03-21 00:21:56 +00004213 *decode_OK = False;
4214 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004215 }
4216 delta += len;
sewardj901ed122005-02-27 13:25:31 +00004217 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004218 showSz ? nameISize(sz) : ' ',
4219 dis_buf);
sewardj354e5c62005-01-27 20:12:52 +00004220 }
4221 return delta;
4222}
4223
4224
sewardjd0a12df2005-02-10 02:07:43 +00004225/*------------------------------------------------------------*/
4226/*--- Disassembling string ops (including REP prefixes) ---*/
4227/*------------------------------------------------------------*/
4228
4229/* Code shared by all the string ops */
4230static
4231void dis_string_op_increment ( Int sz, IRTemp t_inc )
4232{
4233 UChar logSz;
4234 if (sz == 8 || sz == 4 || sz == 2) {
4235 logSz = 1;
4236 if (sz == 4) logSz = 2;
4237 if (sz == 8) logSz = 3;
4238 assign( t_inc,
4239 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4240 mkU8(logSz) ) );
4241 } else {
4242 assign( t_inc,
4243 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4244 }
4245}
4246
sewardj909c06d2005-02-19 22:47:41 +00004247static
tom5cdf4e32011-08-12 15:42:56 +00004248void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
sewardj909c06d2005-02-19 22:47:41 +00004249 Int sz, HChar* name, Prefix pfx )
4250{
4251 IRTemp t_inc = newTemp(Ity_I64);
4252 /* Really we ought to inspect the override prefixes, but we don't.
4253 The following assertion catches any resulting sillyness. */
4254 vassert(pfx == clearSegBits(pfx));
4255 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004256 dis_OP( sz, t_inc, pfx );
sewardj909c06d2005-02-19 22:47:41 +00004257 DIP("%s%c\n", name, nameISize(sz));
4258}
4259
4260static
tom5cdf4e32011-08-12 15:42:56 +00004261void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj909c06d2005-02-19 22:47:41 +00004262{
4263 IRType ty = szToITy(sz);
4264 IRTemp td = newTemp(Ity_I64); /* RDI */
4265 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004266 IRExpr *incd, *incs;
sewardj909c06d2005-02-19 22:47:41 +00004267
tom5cdf4e32011-08-12 15:42:56 +00004268 if (haveASO(pfx)) {
4269 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4270 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4271 } else {
4272 assign( td, getIReg64(R_RDI) );
4273 assign( ts, getIReg64(R_RSI) );
4274 }
sewardj909c06d2005-02-19 22:47:41 +00004275
4276 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4277
tom5cdf4e32011-08-12 15:42:56 +00004278 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4279 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4280 if (haveASO(pfx)) {
4281 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4282 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4283 }
4284 putIReg64( R_RDI, incd );
4285 putIReg64( R_RSI, incs );
sewardj909c06d2005-02-19 22:47:41 +00004286}
4287
sewardj0f99be62009-07-22 09:29:13 +00004288static
tom5cdf4e32011-08-12 15:42:56 +00004289void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj0f99be62009-07-22 09:29:13 +00004290{
4291 IRType ty = szToITy(sz);
4292 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004293 IRExpr *incs;
sewardj0f99be62009-07-22 09:29:13 +00004294
tom5cdf4e32011-08-12 15:42:56 +00004295 if (haveASO(pfx))
4296 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4297 else
4298 assign( ts, getIReg64(R_RSI) );
sewardj0f99be62009-07-22 09:29:13 +00004299
4300 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4301
tom5cdf4e32011-08-12 15:42:56 +00004302 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4303 if (haveASO(pfx))
4304 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4305 putIReg64( R_RSI, incs );
sewardj0f99be62009-07-22 09:29:13 +00004306}
sewardja6b93d12005-02-17 09:28:28 +00004307
4308static
tom5cdf4e32011-08-12 15:42:56 +00004309void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardja6b93d12005-02-17 09:28:28 +00004310{
4311 IRType ty = szToITy(sz);
4312 IRTemp ta = newTemp(ty); /* rAX */
4313 IRTemp td = newTemp(Ity_I64); /* RDI */
tom5cdf4e32011-08-12 15:42:56 +00004314 IRExpr *incd;
sewardja6b93d12005-02-17 09:28:28 +00004315
sewardj5b470602005-02-27 13:10:48 +00004316 assign( ta, getIRegRAX(sz) );
sewardja6b93d12005-02-17 09:28:28 +00004317
tom5cdf4e32011-08-12 15:42:56 +00004318 if (haveASO(pfx))
4319 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4320 else
4321 assign( td, getIReg64(R_RDI) );
sewardja6b93d12005-02-17 09:28:28 +00004322
4323 storeLE( mkexpr(td), mkexpr(ta) );
4324
tom5cdf4e32011-08-12 15:42:56 +00004325 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4326 if (haveASO(pfx))
4327 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4328 putIReg64( R_RDI, incd );
sewardja6b93d12005-02-17 09:28:28 +00004329}
sewardjd0a12df2005-02-10 02:07:43 +00004330
4331static
tom5cdf4e32011-08-12 15:42:56 +00004332void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004333{
4334 IRType ty = szToITy(sz);
4335 IRTemp tdv = newTemp(ty); /* (RDI) */
4336 IRTemp tsv = newTemp(ty); /* (RSI) */
4337 IRTemp td = newTemp(Ity_I64); /* RDI */
4338 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004339 IRExpr *incd, *incs;
sewardjd0a12df2005-02-10 02:07:43 +00004340
tom5cdf4e32011-08-12 15:42:56 +00004341 if (haveASO(pfx)) {
4342 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4343 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4344 } else {
4345 assign( td, getIReg64(R_RDI) );
4346 assign( ts, getIReg64(R_RSI) );
4347 }
sewardjd0a12df2005-02-10 02:07:43 +00004348
4349 assign( tdv, loadLE(ty,mkexpr(td)) );
4350
4351 assign( tsv, loadLE(ty,mkexpr(ts)) );
4352
4353 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4354
tom5cdf4e32011-08-12 15:42:56 +00004355 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4356 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4357 if (haveASO(pfx)) {
4358 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4359 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4360 }
4361 putIReg64( R_RDI, incd );
4362 putIReg64( R_RSI, incs );
sewardjd0a12df2005-02-10 02:07:43 +00004363}
4364
sewardj85520e42005-02-19 15:22:38 +00004365static
tom5cdf4e32011-08-12 15:42:56 +00004366void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj85520e42005-02-19 15:22:38 +00004367{
4368 IRType ty = szToITy(sz);
4369 IRTemp ta = newTemp(ty); /* rAX */
4370 IRTemp td = newTemp(Ity_I64); /* RDI */
4371 IRTemp tdv = newTemp(ty); /* (RDI) */
tom5cdf4e32011-08-12 15:42:56 +00004372 IRExpr *incd;
sewardj85520e42005-02-19 15:22:38 +00004373
sewardj5b470602005-02-27 13:10:48 +00004374 assign( ta, getIRegRAX(sz) );
sewardj85520e42005-02-19 15:22:38 +00004375
tom5cdf4e32011-08-12 15:42:56 +00004376 if (haveASO(pfx))
4377 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4378 else
4379 assign( td, getIReg64(R_RDI) );
sewardj85520e42005-02-19 15:22:38 +00004380
4381 assign( tdv, loadLE(ty,mkexpr(td)) );
4382
4383 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4384
tom5cdf4e32011-08-12 15:42:56 +00004385 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4386 if (haveASO(pfx))
4387 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4388 putIReg64( R_RDI, incd );
sewardj85520e42005-02-19 15:22:38 +00004389}
sewardjd0a12df2005-02-10 02:07:43 +00004390
4391
4392/* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4393 the insn is the last one in the basic block, and so emit a jump to
4394 the next insn, rather than just falling through. */
4395static
sewardjc6f970f2012-04-02 21:54:49 +00004396void dis_REP_op ( /*MOD*/DisResult* dres,
4397 AMD64Condcode cond,
tom5cdf4e32011-08-12 15:42:56 +00004398 void (*dis_OP)(Int, IRTemp, Prefix),
sewardj909c06d2005-02-19 22:47:41 +00004399 Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
4400 Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004401{
4402 IRTemp t_inc = newTemp(Ity_I64);
tom5cdf4e32011-08-12 15:42:56 +00004403 IRTemp tc;
4404 IRExpr* cmp;
sewardjd0a12df2005-02-10 02:07:43 +00004405
sewardj909c06d2005-02-19 22:47:41 +00004406 /* Really we ought to inspect the override prefixes, but we don't.
4407 The following assertion catches any resulting sillyness. */
4408 vassert(pfx == clearSegBits(pfx));
4409
tom5cdf4e32011-08-12 15:42:56 +00004410 if (haveASO(pfx)) {
4411 tc = newTemp(Ity_I32); /* ECX */
4412 assign( tc, getIReg32(R_RCX) );
4413 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4414 } else {
4415 tc = newTemp(Ity_I64); /* RCX */
4416 assign( tc, getIReg64(R_RCX) );
4417 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4418 }
sewardjd0a12df2005-02-10 02:07:43 +00004419
sewardjc6f970f2012-04-02 21:54:49 +00004420 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4421 IRConst_U64(rip_next), OFFB_RIP ) );
sewardjd0a12df2005-02-10 02:07:43 +00004422
tom5cdf4e32011-08-12 15:42:56 +00004423 if (haveASO(pfx))
4424 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4425 else
4426 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
sewardjd0a12df2005-02-10 02:07:43 +00004427
4428 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004429 dis_OP (sz, t_inc, pfx);
sewardjd0a12df2005-02-10 02:07:43 +00004430
4431 if (cond == AMD64CondAlways) {
sewardjc6f970f2012-04-02 21:54:49 +00004432 jmp_lit(dres, Ijk_Boring, rip);
4433 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004434 } else {
4435 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4436 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00004437 IRConst_U64(rip),
4438 OFFB_RIP ) );
4439 jmp_lit(dres, Ijk_Boring, rip_next);
4440 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004441 }
4442 DIP("%s%c\n", name, nameISize(sz));
4443}
sewardj32b2bbe2005-01-28 00:50:10 +00004444
4445
4446/*------------------------------------------------------------*/
4447/*--- Arithmetic, etc. ---*/
4448/*------------------------------------------------------------*/
4449
4450/* IMUL E, G. Supplied eip points to the modR/M byte. */
4451static
sewardj2e28ac42008-12-04 00:05:12 +00004452ULong dis_mul_E_G ( VexAbiInfo* vbi,
4453 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004454 Int size,
sewardj270def42005-07-03 01:03:01 +00004455 Long delta0 )
sewardj32b2bbe2005-01-28 00:50:10 +00004456{
4457 Int alen;
4458 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004459 UChar rm = getUChar(delta0);
sewardj32b2bbe2005-01-28 00:50:10 +00004460 IRType ty = szToITy(size);
4461 IRTemp te = newTemp(ty);
4462 IRTemp tg = newTemp(ty);
4463 IRTemp resLo = newTemp(ty);
4464
sewardj5b470602005-02-27 13:10:48 +00004465 assign( tg, getIRegG(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004466 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004467 assign( te, getIRegE(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004468 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004469 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
sewardj32b2bbe2005-01-28 00:50:10 +00004470 assign( te, loadLE(ty,mkexpr(addr)) );
4471 }
4472
4473 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4474
4475 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4476
sewardj5b470602005-02-27 13:10:48 +00004477 putIRegG(size, pfx, rm, mkexpr(resLo) );
sewardj32b2bbe2005-01-28 00:50:10 +00004478
4479 if (epartIsReg(rm)) {
4480 DIP("imul%c %s, %s\n", nameISize(size),
sewardj901ed122005-02-27 13:25:31 +00004481 nameIRegE(size,pfx,rm),
4482 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004483 return 1+delta0;
4484 } else {
4485 DIP("imul%c %s, %s\n", nameISize(size),
4486 dis_buf,
sewardj901ed122005-02-27 13:25:31 +00004487 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004488 return alen+delta0;
4489 }
4490}
4491
4492
4493/* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4494static
sewardj2e28ac42008-12-04 00:05:12 +00004495ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
4496 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004497 Int size,
sewardj270def42005-07-03 01:03:01 +00004498 Long delta,
sewardj32b2bbe2005-01-28 00:50:10 +00004499 Int litsize )
4500{
4501 Long d64;
4502 Int alen;
4503 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004504 UChar rm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00004505 IRType ty = szToITy(size);
4506 IRTemp te = newTemp(ty);
4507 IRTemp tl = newTemp(ty);
4508 IRTemp resLo = newTemp(ty);
4509
sewardj85520e42005-02-19 15:22:38 +00004510 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
sewardj32b2bbe2005-01-28 00:50:10 +00004511
4512 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004513 assign(te, getIRegE(size, pfx, rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004514 delta++;
4515 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004516 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00004517 imin(4,litsize) );
sewardj32b2bbe2005-01-28 00:50:10 +00004518 assign(te, loadLE(ty, mkexpr(addr)));
4519 delta += alen;
4520 }
4521 d64 = getSDisp(imin(4,litsize),delta);
4522 delta += imin(4,litsize);
4523
sewardj1389d4d2005-01-28 13:46:29 +00004524 d64 &= mkSizeMask(size);
sewardj32b2bbe2005-01-28 00:50:10 +00004525 assign(tl, mkU(ty,d64));
4526
4527 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4528
4529 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4530
sewardj5b470602005-02-27 13:10:48 +00004531 putIRegG(size, pfx, rm, mkexpr(resLo));
sewardj32b2bbe2005-01-28 00:50:10 +00004532
4533 DIP("imul%c $%lld, %s, %s\n",
4534 nameISize(size), d64,
sewardj5b470602005-02-27 13:10:48 +00004535 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4536 nameIRegG(size,pfx,rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004537 return delta;
4538}
4539
4540
sewardjfd181282010-06-14 21:29:35 +00004541/* Generate an IR sequence to do a popcount operation on the supplied
4542 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4543 Ity_I16, Ity_I32 or Ity_I64 only. */
4544static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4545{
4546 Int i;
4547 if (ty == Ity_I16) {
4548 IRTemp old = IRTemp_INVALID;
4549 IRTemp nyu = IRTemp_INVALID;
4550 IRTemp mask[4], shift[4];
4551 for (i = 0; i < 4; i++) {
4552 mask[i] = newTemp(ty);
4553 shift[i] = 1 << i;
4554 }
4555 assign(mask[0], mkU16(0x5555));
4556 assign(mask[1], mkU16(0x3333));
4557 assign(mask[2], mkU16(0x0F0F));
4558 assign(mask[3], mkU16(0x00FF));
4559 old = src;
4560 for (i = 0; i < 4; i++) {
4561 nyu = newTemp(ty);
4562 assign(nyu,
4563 binop(Iop_Add16,
4564 binop(Iop_And16,
4565 mkexpr(old),
4566 mkexpr(mask[i])),
4567 binop(Iop_And16,
4568 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4569 mkexpr(mask[i]))));
4570 old = nyu;
4571 }
4572 return nyu;
4573 }
4574 if (ty == Ity_I32) {
4575 IRTemp old = IRTemp_INVALID;
4576 IRTemp nyu = IRTemp_INVALID;
4577 IRTemp mask[5], shift[5];
4578 for (i = 0; i < 5; i++) {
4579 mask[i] = newTemp(ty);
4580 shift[i] = 1 << i;
4581 }
4582 assign(mask[0], mkU32(0x55555555));
4583 assign(mask[1], mkU32(0x33333333));
4584 assign(mask[2], mkU32(0x0F0F0F0F));
4585 assign(mask[3], mkU32(0x00FF00FF));
4586 assign(mask[4], mkU32(0x0000FFFF));
4587 old = src;
4588 for (i = 0; i < 5; i++) {
4589 nyu = newTemp(ty);
4590 assign(nyu,
4591 binop(Iop_Add32,
4592 binop(Iop_And32,
4593 mkexpr(old),
4594 mkexpr(mask[i])),
4595 binop(Iop_And32,
4596 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4597 mkexpr(mask[i]))));
4598 old = nyu;
4599 }
4600 return nyu;
4601 }
4602 if (ty == Ity_I64) {
4603 IRTemp old = IRTemp_INVALID;
4604 IRTemp nyu = IRTemp_INVALID;
4605 IRTemp mask[6], shift[6];
4606 for (i = 0; i < 6; i++) {
4607 mask[i] = newTemp(ty);
4608 shift[i] = 1 << i;
4609 }
4610 assign(mask[0], mkU64(0x5555555555555555ULL));
4611 assign(mask[1], mkU64(0x3333333333333333ULL));
4612 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4613 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4614 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4615 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4616 old = src;
4617 for (i = 0; i < 6; i++) {
4618 nyu = newTemp(ty);
4619 assign(nyu,
4620 binop(Iop_Add64,
4621 binop(Iop_And64,
4622 mkexpr(old),
4623 mkexpr(mask[i])),
4624 binop(Iop_And64,
4625 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4626 mkexpr(mask[i]))));
4627 old = nyu;
4628 }
4629 return nyu;
4630 }
4631 /*NOTREACHED*/
4632 vassert(0);
4633}
4634
4635
sewardj9a660ea2010-07-29 11:34:38 +00004636/* Generate an IR sequence to do a count-leading-zeroes operation on
4637 the supplied IRTemp, and return a new IRTemp holding the result.
4638 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4639 the argument is zero, return the number of bits in the word (the
4640 natural semantics). */
4641static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4642{
4643 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4644
4645 IRTemp src64 = newTemp(Ity_I64);
4646 assign(src64, widenUto64( mkexpr(src) ));
4647
4648 IRTemp src64x = newTemp(Ity_I64);
4649 assign(src64x,
4650 binop(Iop_Shl64, mkexpr(src64),
4651 mkU8(64 - 8 * sizeofIRType(ty))));
4652
4653 // Clz64 has undefined semantics when its input is zero, so
4654 // special-case around that.
4655 IRTemp res64 = newTemp(Ity_I64);
4656 assign(res64,
4657 IRExpr_Mux0X(
4658 unop(Iop_1Uto8,
4659 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))),
4660 unop(Iop_Clz64, mkexpr(src64x)),
4661 mkU64(8 * sizeofIRType(ty))
4662 ));
4663
4664 IRTemp res = newTemp(ty);
4665 assign(res, narrowTo(ty, mkexpr(res64)));
4666 return res;
4667}
4668
4669
sewardjbcbb9de2005-03-27 02:22:32 +00004670/*------------------------------------------------------------*/
4671/*--- ---*/
4672/*--- x87 FLOATING POINT INSTRUCTIONS ---*/
4673/*--- ---*/
4674/*------------------------------------------------------------*/
4675
4676/* --- Helper functions for dealing with the register stack. --- */
4677
4678/* --- Set the emulation-warning pseudo-register. --- */
4679
4680static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
4681{
sewardjdd40fdf2006-12-24 02:20:24 +00004682 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardjbcbb9de2005-03-27 02:22:32 +00004683 stmt( IRStmt_Put( OFFB_EMWARN, e ) );
4684}
sewardj8d965312005-02-25 02:48:47 +00004685
4686/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
4687
4688static IRExpr* mkQNaN64 ( void )
4689{
4690 /* QNaN is 0 2047 1 0(51times)
4691 == 0b 11111111111b 1 0(51times)
4692 == 0x7FF8 0000 0000 0000
4693 */
4694 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
4695}
4696
4697/* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
4698
4699static IRExpr* get_ftop ( void )
4700{
4701 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
4702}
4703
4704static void put_ftop ( IRExpr* e )
4705{
sewardjdd40fdf2006-12-24 02:20:24 +00004706 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00004707 stmt( IRStmt_Put( OFFB_FTOP, e ) );
4708}
4709
sewardj25a85812005-05-08 23:03:48 +00004710/* --------- Get/put the C3210 bits. --------- */
4711
4712static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
4713{
4714 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
4715}
4716
4717static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
4718{
sewardjdd40fdf2006-12-24 02:20:24 +00004719 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
sewardj25a85812005-05-08 23:03:48 +00004720 stmt( IRStmt_Put( OFFB_FC3210, e ) );
4721}
sewardjc49ce232005-02-25 13:03:03 +00004722
4723/* --------- Get/put the FPU rounding mode. --------- */
4724static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
4725{
4726 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
4727}
4728
sewardj5e205372005-05-09 02:57:08 +00004729static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
4730{
sewardjdd40fdf2006-12-24 02:20:24 +00004731 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj5e205372005-05-09 02:57:08 +00004732 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
4733}
sewardjc49ce232005-02-25 13:03:03 +00004734
4735
4736/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
4737/* Produces a value in 0 .. 3, which is encoded as per the type
4738 IRRoundingMode. Since the guest_FPROUND value is also encoded as
4739 per IRRoundingMode, we merely need to get it and mask it for
4740 safety.
4741*/
4742static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
4743{
4744 return binop( Iop_And32, get_fpround(), mkU32(3) );
4745}
sewardj8d965312005-02-25 02:48:47 +00004746
sewardj4796d662006-02-05 16:06:26 +00004747static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
4748{
4749 return mkU32(Irrm_NEAREST);
4750}
4751
sewardj8d965312005-02-25 02:48:47 +00004752
4753/* --------- Get/set FP register tag bytes. --------- */
4754
4755/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
4756
4757static void put_ST_TAG ( Int i, IRExpr* value )
4758{
sewardjdd40fdf2006-12-24 02:20:24 +00004759 IRRegArray* descr;
4760 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
4761 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
floriand6f38b32012-05-31 15:46:18 +00004762 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00004763}
4764
4765/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
4766 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
4767
4768static IRExpr* get_ST_TAG ( Int i )
4769{
sewardjdd40fdf2006-12-24 02:20:24 +00004770 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
sewardj8d965312005-02-25 02:48:47 +00004771 return IRExpr_GetI( descr, get_ftop(), i );
4772}
4773
4774
4775/* --------- Get/set FP registers. --------- */
4776
4777/* Given i, and some expression e, emit 'ST(i) = e' and set the
4778 register's tag to indicate the register is full. The previous
4779 state of the register is not checked. */
4780
4781static void put_ST_UNCHECKED ( Int i, IRExpr* value )
4782{
sewardjdd40fdf2006-12-24 02:20:24 +00004783 IRRegArray* descr;
4784 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
4785 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
floriand6f38b32012-05-31 15:46:18 +00004786 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00004787 /* Mark the register as in-use. */
4788 put_ST_TAG(i, mkU8(1));
4789}
4790
4791/* Given i, and some expression e, emit
4792 ST(i) = is_full(i) ? NaN : e
4793 and set the tag accordingly.
4794*/
4795
4796static void put_ST ( Int i, IRExpr* value )
4797{
4798 put_ST_UNCHECKED( i,
4799 IRExpr_Mux0X( get_ST_TAG(i),
4800 /* 0 means empty */
4801 value,
4802 /* non-0 means full */
4803 mkQNaN64()
4804 )
4805 );
4806}
4807
4808
4809/* Given i, generate an expression yielding 'ST(i)'. */
4810
4811static IRExpr* get_ST_UNCHECKED ( Int i )
4812{
sewardjdd40fdf2006-12-24 02:20:24 +00004813 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
sewardj8d965312005-02-25 02:48:47 +00004814 return IRExpr_GetI( descr, get_ftop(), i );
4815}
4816
4817
4818/* Given i, generate an expression yielding
4819 is_full(i) ? ST(i) : NaN
4820*/
4821
4822static IRExpr* get_ST ( Int i )
4823{
4824 return
4825 IRExpr_Mux0X( get_ST_TAG(i),
4826 /* 0 means empty */
4827 mkQNaN64(),
4828 /* non-0 means full */
4829 get_ST_UNCHECKED(i));
4830}
4831
4832
4833/* Adjust FTOP downwards by one register. */
4834
4835static void fp_push ( void )
4836{
4837 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
4838}
4839
4840/* Adjust FTOP upwards by one register, and mark the vacated register
4841 as empty. */
4842
4843static void fp_pop ( void )
4844{
4845 put_ST_TAG(0, mkU8(0));
4846 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
4847}
4848
sewardj25a85812005-05-08 23:03:48 +00004849/* Clear the C2 bit of the FPU status register, for
4850 sin/cos/tan/sincos. */
4851
4852static void clear_C2 ( void )
4853{
4854 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
4855}
sewardj48a89d82005-05-06 11:50:13 +00004856
sewardj7c2d2822006-03-07 00:22:02 +00004857/* Invent a plausible-looking FPU status word value:
4858 ((ftop & 7) << 11) | (c3210 & 0x4700)
4859 */
4860static IRExpr* get_FPU_sw ( void )
4861{
4862 return
4863 unop(Iop_32to16,
4864 binop(Iop_Or32,
4865 binop(Iop_Shl32,
4866 binop(Iop_And32, get_ftop(), mkU32(7)),
4867 mkU8(11)),
4868 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
4869 mkU32(0x4700))
4870 ));
4871}
4872
sewardj48a89d82005-05-06 11:50:13 +00004873
4874/* ------------------------------------------------------- */
4875/* Given all that stack-mangling junk, we can now go ahead
4876 and describe FP instructions.
4877*/
4878
4879/* ST(0) = ST(0) `op` mem64/32(addr)
4880 Need to check ST(0)'s tag on read, but not on write.
4881*/
4882static
sewardjca673ab2005-05-11 10:03:08 +00004883void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
sewardj48a89d82005-05-06 11:50:13 +00004884 IROp op, Bool dbl )
4885{
4886 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4887 if (dbl) {
4888 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00004889 triop( op,
4890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00004891 get_ST(0),
4892 loadLE(Ity_F64,mkexpr(addr))
4893 ));
4894 } else {
4895 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00004896 triop( op,
4897 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00004898 get_ST(0),
4899 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
4900 ));
4901 }
4902}
sewardj7bc00082005-03-27 05:08:32 +00004903
4904
4905/* ST(0) = mem64/32(addr) `op` ST(0)
4906 Need to check ST(0)'s tag on read, but not on write.
4907*/
4908static
4909void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
4910 IROp op, Bool dbl )
4911{
4912 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
4913 if (dbl) {
4914 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00004915 triop( op,
4916 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00004917 loadLE(Ity_F64,mkexpr(addr)),
4918 get_ST(0)
4919 ));
4920 } else {
4921 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00004922 triop( op,
4923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00004924 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
4925 get_ST(0)
4926 ));
4927 }
4928}
sewardj37d52572005-02-25 14:22:12 +00004929
4930
4931/* ST(dst) = ST(dst) `op` ST(src).
4932 Check dst and src tags when reading but not on write.
4933*/
4934static
4935void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4936 Bool pop_after )
4937{
sewardj1027dc22005-02-26 01:55:02 +00004938 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
sewardj37d52572005-02-25 14:22:12 +00004939 put_ST_UNCHECKED(
4940 st_dst,
sewardj4796d662006-02-05 16:06:26 +00004941 triop( op,
4942 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4943 get_ST(st_dst),
4944 get_ST(st_src) )
sewardj37d52572005-02-25 14:22:12 +00004945 );
4946 if (pop_after)
4947 fp_pop();
4948}
4949
sewardj137015d2005-03-27 04:01:15 +00004950/* ST(dst) = ST(src) `op` ST(dst).
4951 Check dst and src tags when reading but not on write.
4952*/
4953static
4954void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
4955 Bool pop_after )
4956{
4957 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
4958 put_ST_UNCHECKED(
4959 st_dst,
sewardj4796d662006-02-05 16:06:26 +00004960 triop( op,
4961 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4962 get_ST(st_src),
4963 get_ST(st_dst) )
sewardj137015d2005-03-27 04:01:15 +00004964 );
4965 if (pop_after)
4966 fp_pop();
4967}
sewardjc49ce232005-02-25 13:03:03 +00004968
4969/* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
4970static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
4971{
sewardj1027dc22005-02-26 01:55:02 +00004972 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
sewardjc49ce232005-02-25 13:03:03 +00004973 /* This is a bit of a hack (and isn't really right). It sets
4974 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
4975 documentation implies A and S are unchanged.
4976 */
4977 /* It's also fishy in that it is used both for COMIP and
4978 UCOMIP, and they aren't the same (although similar). */
4979 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4981 stmt( IRStmt_Put(
4982 OFFB_CC_DEP1,
4983 binop( Iop_And64,
4984 unop( Iop_32Uto64,
4985 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
4986 mkU64(0x45)
4987 )));
4988 if (pop_after)
4989 fp_pop();
4990}
sewardj8d965312005-02-25 02:48:47 +00004991
4992
sewardjb707d102007-07-11 22:49:26 +00004993/* returns
4994 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
4995*/
4996static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
4997{
4998 IRTemp t32 = newTemp(Ity_I32);
4999 assign( t32, e32 );
5000 return
5001 IRExpr_Mux0X(
5002 unop(Iop_1Uto8,
5003 binop(Iop_CmpLT64U,
5004 unop(Iop_32Uto64,
5005 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5006 mkU64(65536))),
5007 mkU16( 0x8000 ),
5008 unop(Iop_32to16, mkexpr(t32)));
5009}
5010
5011
sewardj8d965312005-02-25 02:48:47 +00005012static
sewardjb4fd2e72005-03-23 13:34:11 +00005013ULong dis_FPU ( /*OUT*/Bool* decode_ok,
sewardj2e28ac42008-12-04 00:05:12 +00005014 VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardj8d965312005-02-25 02:48:47 +00005015{
5016 Int len;
5017 UInt r_src, r_dst;
5018 HChar dis_buf[50];
5019 IRTemp t1, t2;
5020
5021 /* On entry, delta points at the second byte of the insn (the modrm
5022 byte).*/
5023 UChar first_opcode = getUChar(delta-1);
5024 UChar modrm = getUChar(delta+0);
5025
sewardj37d52572005-02-25 14:22:12 +00005026 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5027
5028 if (first_opcode == 0xD8) {
5029 if (modrm < 0xC0) {
5030
5031 /* bits 5,4,3 are an opcode extension, and the modRM also
5032 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005033 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj7bc00082005-03-27 05:08:32 +00005034 delta += len;
sewardj37d52572005-02-25 14:22:12 +00005035
sewardj901ed122005-02-27 13:25:31 +00005036 switch (gregLO3ofRM(modrm)) {
sewardj37d52572005-02-25 14:22:12 +00005037
sewardj48a89d82005-05-06 11:50:13 +00005038 case 0: /* FADD single-real */
5039 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5040 break;
5041
sewardje6939f02005-05-07 01:01:24 +00005042 case 1: /* FMUL single-real */
5043 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5044 break;
5045
sewardjdb855502012-05-21 21:51:36 +00005046 case 2: /* FCOM single-real */
5047 DIP("fcoms %s\n", dis_buf);
5048 /* This forces C1 to zero, which isn't right. */
5049 /* The AMD documentation suggests that forcing C1 to
5050 zero is correct (Eliot Moss) */
5051 put_C3210(
5052 unop( Iop_32Uto64,
5053 binop( Iop_And32,
5054 binop(Iop_Shl32,
5055 binop(Iop_CmpF64,
5056 get_ST(0),
5057 unop(Iop_F32toF64,
5058 loadLE(Ity_F32,mkexpr(addr)))),
5059 mkU8(8)),
5060 mkU32(0x4500)
5061 )));
5062 break;
5063
5064 case 3: /* FCOMP single-real */
5065 /* The AMD documentation suggests that forcing C1 to
5066 zero is correct (Eliot Moss) */
5067 DIP("fcomps %s\n", dis_buf);
5068 /* This forces C1 to zero, which isn't right. */
5069 put_C3210(
5070 unop( Iop_32Uto64,
5071 binop( Iop_And32,
5072 binop(Iop_Shl32,
5073 binop(Iop_CmpF64,
5074 get_ST(0),
5075 unop(Iop_F32toF64,
5076 loadLE(Ity_F32,mkexpr(addr)))),
5077 mkU8(8)),
5078 mkU32(0x4500)
5079 )));
5080 fp_pop();
5081 break;
sewardje6939f02005-05-07 01:01:24 +00005082
5083 case 4: /* FSUB single-real */
5084 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5085 break;
sewardj7bc00082005-03-27 05:08:32 +00005086
5087 case 5: /* FSUBR single-real */
5088 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5089 break;
5090
sewardje6939f02005-05-07 01:01:24 +00005091 case 6: /* FDIV single-real */
5092 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5093 break;
5094
5095 case 7: /* FDIVR single-real */
5096 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5097 break;
sewardj37d52572005-02-25 14:22:12 +00005098
5099 default:
sewardj901ed122005-02-27 13:25:31 +00005100 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj37d52572005-02-25 14:22:12 +00005101 vex_printf("first_opcode == 0xD8\n");
5102 goto decode_fail;
5103 }
5104 } else {
5105 delta++;
5106 switch (modrm) {
5107
5108 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5109 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5110 break;
5111
sewardj137015d2005-03-27 04:01:15 +00005112 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5113 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5114 break;
5115
sewardj1859ecd2007-02-23 08:48:22 +00005116 /* Dunno if this is right */
5117 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5118 r_dst = (UInt)modrm - 0xD0;
5119 DIP("fcom %%st(0),%%st(%d)\n", r_dst);
5120 /* This forces C1 to zero, which isn't right. */
5121 put_C3210(
5122 unop(Iop_32Uto64,
5123 binop( Iop_And32,
5124 binop(Iop_Shl32,
5125 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5126 mkU8(8)),
5127 mkU32(0x4500)
5128 )));
5129 break;
5130
sewardj90e2e4b2007-05-04 09:41:24 +00005131 /* Dunno if this is right */
5132 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5133 r_dst = (UInt)modrm - 0xD8;
5134 DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
5135 /* This forces C1 to zero, which isn't right. */
5136 put_C3210(
5137 unop(Iop_32Uto64,
5138 binop( Iop_And32,
5139 binop(Iop_Shl32,
5140 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5141 mkU8(8)),
5142 mkU32(0x4500)
5143 )));
5144 fp_pop();
5145 break;
5146
sewardj137015d2005-03-27 04:01:15 +00005147 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5148 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5149 break;
5150
sewardje6939f02005-05-07 01:01:24 +00005151 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5152 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5153 break;
sewardj137015d2005-03-27 04:01:15 +00005154
5155 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5156 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5157 break;
5158
sewardj48a89d82005-05-06 11:50:13 +00005159 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5160 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5161 break;
sewardj37d52572005-02-25 14:22:12 +00005162
5163 default:
5164 goto decode_fail;
5165 }
5166 }
5167 }
sewardj8d965312005-02-25 02:48:47 +00005168
5169 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
sewardj37d52572005-02-25 14:22:12 +00005170 else
sewardj8d965312005-02-25 02:48:47 +00005171 if (first_opcode == 0xD9) {
5172 if (modrm < 0xC0) {
5173
5174 /* bits 5,4,3 are an opcode extension, and the modRM also
5175 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005176 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00005177 delta += len;
5178
sewardj901ed122005-02-27 13:25:31 +00005179 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00005180
sewardjc49ce232005-02-25 13:03:03 +00005181 case 0: /* FLD single-real */
5182 DIP("flds %s\n", dis_buf);
5183 fp_push();
5184 put_ST(0, unop(Iop_F32toF64,
5185 loadLE(Ity_F32, mkexpr(addr))));
5186 break;
5187
5188 case 2: /* FST single-real */
5189 DIP("fsts %s\n", dis_buf);
5190 storeLE(mkexpr(addr),
5191 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5192 break;
5193
5194 case 3: /* FSTP single-real */
5195 DIP("fstps %s\n", dis_buf);
5196 storeLE(mkexpr(addr),
5197 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5198 fp_pop();
5199 break;
5200
sewardj4017a3b2005-06-13 12:17:27 +00005201 case 4: { /* FLDENV m28 */
5202 /* Uses dirty helper:
5203 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5204 IRTemp ew = newTemp(Ity_I32);
5205 IRTemp w64 = newTemp(Ity_I64);
5206 IRDirty* d = unsafeIRDirty_0_N (
5207 0/*regparms*/,
5208 "amd64g_dirtyhelper_FLDENV",
5209 &amd64g_dirtyhelper_FLDENV,
5210 mkIRExprVec_1( mkexpr(addr) )
5211 );
5212 d->needsBBP = True;
5213 d->tmp = w64;
5214 /* declare we're reading memory */
5215 d->mFx = Ifx_Read;
5216 d->mAddr = mkexpr(addr);
5217 d->mSize = 28;
5218
5219 /* declare we're writing guest state */
5220 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005221 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005222
5223 d->fxState[0].fx = Ifx_Write;
5224 d->fxState[0].offset = OFFB_FTOP;
5225 d->fxState[0].size = sizeof(UInt);
5226
5227 d->fxState[1].fx = Ifx_Write;
5228 d->fxState[1].offset = OFFB_FPTAGS;
5229 d->fxState[1].size = 8 * sizeof(UChar);
5230
5231 d->fxState[2].fx = Ifx_Write;
5232 d->fxState[2].offset = OFFB_FPROUND;
5233 d->fxState[2].size = sizeof(ULong);
5234
5235 d->fxState[3].fx = Ifx_Write;
5236 d->fxState[3].offset = OFFB_FC3210;
5237 d->fxState[3].size = sizeof(ULong);
5238
5239 stmt( IRStmt_Dirty(d) );
5240
5241 /* ew contains any emulation warning we may need to
5242 issue. If needed, side-exit to the next insn,
5243 reporting the warning, so that Valgrind's dispatcher
5244 sees the warning. */
5245 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5246 put_emwarn( mkexpr(ew) );
5247 stmt(
5248 IRStmt_Exit(
5249 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5250 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005251 IRConst_U64( guest_RIP_bbstart+delta ),
5252 OFFB_RIP
sewardj4017a3b2005-06-13 12:17:27 +00005253 )
5254 );
5255
5256 DIP("fldenv %s\n", dis_buf);
5257 break;
5258 }
sewardj5e205372005-05-09 02:57:08 +00005259
5260 case 5: {/* FLDCW */
5261 /* The only thing we observe in the control word is the
5262 rounding mode. Therefore, pass the 16-bit value
5263 (x87 native-format control word) to a clean helper,
5264 getting back a 64-bit value, the lower half of which
5265 is the FPROUND value to store, and the upper half of
5266 which is the emulation-warning token which may be
5267 generated.
5268 */
5269 /* ULong amd64h_check_fldcw ( ULong ); */
5270 IRTemp t64 = newTemp(Ity_I64);
5271 IRTemp ew = newTemp(Ity_I32);
5272 DIP("fldcw %s\n", dis_buf);
5273 assign( t64, mkIRExprCCall(
5274 Ity_I64, 0/*regparms*/,
5275 "amd64g_check_fldcw",
5276 &amd64g_check_fldcw,
5277 mkIRExprVec_1(
5278 unop( Iop_16Uto64,
5279 loadLE(Ity_I16, mkexpr(addr)))
5280 )
5281 )
5282 );
5283
5284 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5285 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5286 put_emwarn( mkexpr(ew) );
5287 /* Finally, if an emulation warning was reported,
5288 side-exit to the next insn, reporting the warning,
5289 so that Valgrind's dispatcher sees the warning. */
5290 stmt(
5291 IRStmt_Exit(
5292 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5293 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005294 IRConst_U64( guest_RIP_bbstart+delta ),
5295 OFFB_RIP
sewardj5e205372005-05-09 02:57:08 +00005296 )
5297 );
5298 break;
5299 }
5300
sewardj4017a3b2005-06-13 12:17:27 +00005301 case 6: { /* FNSTENV m28 */
5302 /* Uses dirty helper:
5303 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5304 IRDirty* d = unsafeIRDirty_0_N (
5305 0/*regparms*/,
5306 "amd64g_dirtyhelper_FSTENV",
5307 &amd64g_dirtyhelper_FSTENV,
5308 mkIRExprVec_1( mkexpr(addr) )
5309 );
5310 d->needsBBP = True;
5311 /* declare we're writing memory */
5312 d->mFx = Ifx_Write;
5313 d->mAddr = mkexpr(addr);
5314 d->mSize = 28;
5315
5316 /* declare we're reading guest state */
5317 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005318 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005319
5320 d->fxState[0].fx = Ifx_Read;
5321 d->fxState[0].offset = OFFB_FTOP;
5322 d->fxState[0].size = sizeof(UInt);
5323
5324 d->fxState[1].fx = Ifx_Read;
5325 d->fxState[1].offset = OFFB_FPTAGS;
5326 d->fxState[1].size = 8 * sizeof(UChar);
5327
5328 d->fxState[2].fx = Ifx_Read;
5329 d->fxState[2].offset = OFFB_FPROUND;
5330 d->fxState[2].size = sizeof(ULong);
5331
5332 d->fxState[3].fx = Ifx_Read;
5333 d->fxState[3].offset = OFFB_FC3210;
5334 d->fxState[3].size = sizeof(ULong);
5335
5336 stmt( IRStmt_Dirty(d) );
5337
5338 DIP("fnstenv %s\n", dis_buf);
5339 break;
5340 }
sewardj5e205372005-05-09 02:57:08 +00005341
5342 case 7: /* FNSTCW */
5343 /* Fake up a native x87 FPU control word. The only
5344 thing it depends on is FPROUND[1:0], so call a clean
5345 helper to cook it up. */
sewardj4017a3b2005-06-13 12:17:27 +00005346 /* ULong amd64g_create_fpucw ( ULong fpround ) */
sewardj5e205372005-05-09 02:57:08 +00005347 DIP("fnstcw %s\n", dis_buf);
5348 storeLE(
5349 mkexpr(addr),
5350 unop( Iop_64to16,
5351 mkIRExprCCall(
5352 Ity_I64, 0/*regp*/,
5353 "amd64g_create_fpucw", &amd64g_create_fpucw,
5354 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5355 )
5356 )
5357 );
5358 break;
sewardj8d965312005-02-25 02:48:47 +00005359
5360 default:
sewardj901ed122005-02-27 13:25:31 +00005361 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00005362 vex_printf("first_opcode == 0xD9\n");
5363 goto decode_fail;
5364 }
5365
5366 } else {
5367 delta++;
5368 switch (modrm) {
5369
sewardjc49ce232005-02-25 13:03:03 +00005370 case 0xC0 ... 0xC7: /* FLD %st(?) */
5371 r_src = (UInt)modrm - 0xC0;
sewardj1027dc22005-02-26 01:55:02 +00005372 DIP("fld %%st(%u)\n", r_src);
sewardjc49ce232005-02-25 13:03:03 +00005373 t1 = newTemp(Ity_F64);
5374 assign(t1, get_ST(r_src));
5375 fp_push();
5376 put_ST(0, mkexpr(t1));
5377 break;
sewardj8d965312005-02-25 02:48:47 +00005378
5379 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5380 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00005381 DIP("fxch %%st(%u)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00005382 t1 = newTemp(Ity_F64);
5383 t2 = newTemp(Ity_F64);
5384 assign(t1, get_ST(0));
5385 assign(t2, get_ST(r_src));
5386 put_ST_UNCHECKED(0, mkexpr(t2));
5387 put_ST_UNCHECKED(r_src, mkexpr(t1));
5388 break;
5389
5390 case 0xE0: /* FCHS */
5391 DIP("fchs\n");
5392 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5393 break;
5394
sewardj137015d2005-03-27 04:01:15 +00005395 case 0xE1: /* FABS */
5396 DIP("fabs\n");
5397 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5398 break;
5399
sewardj4f9847d2005-07-25 11:58:34 +00005400 case 0xE5: { /* FXAM */
5401 /* This is an interesting one. It examines %st(0),
5402 regardless of whether the tag says it's empty or not.
5403 Here, just pass both the tag (in our format) and the
5404 value (as a double, actually a ULong) to a helper
5405 function. */
5406 IRExpr** args
5407 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5408 unop(Iop_ReinterpF64asI64,
5409 get_ST_UNCHECKED(0)) );
5410 put_C3210(mkIRExprCCall(
5411 Ity_I64,
5412 0/*regparm*/,
5413 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5414 args
5415 ));
5416 DIP("fxam\n");
5417 break;
5418 }
sewardjc49ce232005-02-25 13:03:03 +00005419
5420 case 0xE8: /* FLD1 */
5421 DIP("fld1\n");
5422 fp_push();
5423 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5424 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5425 break;
5426
sewardj6847d8c2005-05-12 19:21:55 +00005427 case 0xE9: /* FLDL2T */
5428 DIP("fldl2t\n");
5429 fp_push();
5430 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5431 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5432 break;
5433
5434 case 0xEA: /* FLDL2E */
5435 DIP("fldl2e\n");
5436 fp_push();
5437 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5438 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5439 break;
5440
5441 case 0xEB: /* FLDPI */
5442 DIP("fldpi\n");
5443 fp_push();
5444 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5445 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5446 break;
5447
5448 case 0xEC: /* FLDLG2 */
5449 DIP("fldlg2\n");
5450 fp_push();
5451 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5452 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5453 break;
5454
5455 case 0xED: /* FLDLN2 */
5456 DIP("fldln2\n");
5457 fp_push();
5458 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5459 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5460 break;
sewardjc49ce232005-02-25 13:03:03 +00005461
5462 case 0xEE: /* FLDZ */
5463 DIP("fldz\n");
5464 fp_push();
5465 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5466 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5467 break;
5468
sewardj25a85812005-05-08 23:03:48 +00005469 case 0xF0: /* F2XM1 */
5470 DIP("f2xm1\n");
sewardj4796d662006-02-05 16:06:26 +00005471 put_ST_UNCHECKED(0,
5472 binop(Iop_2xm1F64,
5473 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5474 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005475 break;
5476
5477 case 0xF1: /* FYL2X */
5478 DIP("fyl2x\n");
sewardj4796d662006-02-05 16:06:26 +00005479 put_ST_UNCHECKED(1,
5480 triop(Iop_Yl2xF64,
5481 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5482 get_ST(1),
5483 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005484 fp_pop();
5485 break;
5486
sewardj5e205372005-05-09 02:57:08 +00005487 case 0xF2: /* FPTAN */
5488 DIP("ftan\n");
sewardj4796d662006-02-05 16:06:26 +00005489 put_ST_UNCHECKED(0,
5490 binop(Iop_TanF64,
5491 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5492 get_ST(0)));
sewardj5e205372005-05-09 02:57:08 +00005493 fp_push();
5494 put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
5495 clear_C2(); /* HACK */
5496 break;
sewardj25a85812005-05-08 23:03:48 +00005497
5498 case 0xF3: /* FPATAN */
5499 DIP("fpatan\n");
sewardj4796d662006-02-05 16:06:26 +00005500 put_ST_UNCHECKED(1,
5501 triop(Iop_AtanF64,
5502 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5503 get_ST(1),
5504 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005505 fp_pop();
5506 break;
5507
sewardj879cee02006-03-07 01:15:50 +00005508 case 0xF4: { /* FXTRACT */
5509 IRTemp argF = newTemp(Ity_F64);
5510 IRTemp sigF = newTemp(Ity_F64);
5511 IRTemp expF = newTemp(Ity_F64);
5512 IRTemp argI = newTemp(Ity_I64);
5513 IRTemp sigI = newTemp(Ity_I64);
5514 IRTemp expI = newTemp(Ity_I64);
5515 DIP("fxtract\n");
5516 assign( argF, get_ST(0) );
5517 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
5518 assign( sigI,
5519 mkIRExprCCall(
5520 Ity_I64, 0/*regparms*/,
5521 "x86amd64g_calculate_FXTRACT",
5522 &x86amd64g_calculate_FXTRACT,
5523 mkIRExprVec_2( mkexpr(argI),
5524 mkIRExpr_HWord(0)/*sig*/ ))
5525 );
5526 assign( expI,
5527 mkIRExprCCall(
5528 Ity_I64, 0/*regparms*/,
5529 "x86amd64g_calculate_FXTRACT",
5530 &x86amd64g_calculate_FXTRACT,
5531 mkIRExprVec_2( mkexpr(argI),
5532 mkIRExpr_HWord(1)/*exp*/ ))
5533 );
5534 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
5535 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
5536 /* exponent */
5537 put_ST_UNCHECKED(0, mkexpr(expF) );
5538 fp_push();
5539 /* significand */
5540 put_ST(0, mkexpr(sigF) );
5541 break;
5542 }
5543
sewardj4970e4e2008-10-11 10:07:55 +00005544 case 0xF5: { /* FPREM1 -- IEEE compliant */
5545 IRTemp a1 = newTemp(Ity_F64);
5546 IRTemp a2 = newTemp(Ity_F64);
5547 DIP("fprem1\n");
5548 /* Do FPREM1 twice, once to get the remainder, and once
5549 to get the C3210 flag values. */
5550 assign( a1, get_ST(0) );
5551 assign( a2, get_ST(1) );
5552 put_ST_UNCHECKED(0,
5553 triop(Iop_PRem1F64,
5554 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5555 mkexpr(a1),
5556 mkexpr(a2)));
5557 put_C3210(
5558 unop(Iop_32Uto64,
5559 triop(Iop_PRem1C3210F64,
5560 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5561 mkexpr(a1),
5562 mkexpr(a2)) ));
5563 break;
5564 }
sewardj6847d8c2005-05-12 19:21:55 +00005565
5566 case 0xF7: /* FINCSTP */
5567 DIP("fincstp\n");
5568 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5569 break;
5570
sewardjf4c803b2006-09-11 11:07:34 +00005571 case 0xF8: { /* FPREM -- not IEEE compliant */
5572 IRTemp a1 = newTemp(Ity_F64);
5573 IRTemp a2 = newTemp(Ity_F64);
5574 DIP("fprem\n");
5575 /* Do FPREM twice, once to get the remainder, and once
5576 to get the C3210 flag values. */
5577 assign( a1, get_ST(0) );
5578 assign( a2, get_ST(1) );
5579 put_ST_UNCHECKED(0,
5580 triop(Iop_PRemF64,
5581 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5582 mkexpr(a1),
5583 mkexpr(a2)));
5584 put_C3210(
5585 unop(Iop_32Uto64,
5586 triop(Iop_PRemC3210F64,
5587 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5588 mkexpr(a1),
5589 mkexpr(a2)) ));
5590 break;
5591 }
5592
sewardj5e205372005-05-09 02:57:08 +00005593 case 0xF9: /* FYL2XP1 */
5594 DIP("fyl2xp1\n");
sewardj4796d662006-02-05 16:06:26 +00005595 put_ST_UNCHECKED(1,
5596 triop(Iop_Yl2xp1F64,
5597 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5598 get_ST(1),
5599 get_ST(0)));
sewardj5e205372005-05-09 02:57:08 +00005600 fp_pop();
5601 break;
sewardje6939f02005-05-07 01:01:24 +00005602
5603 case 0xFA: /* FSQRT */
5604 DIP("fsqrt\n");
sewardj4796d662006-02-05 16:06:26 +00005605 put_ST_UNCHECKED(0,
5606 binop(Iop_SqrtF64,
5607 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5608 get_ST(0)));
sewardje6939f02005-05-07 01:01:24 +00005609 break;
5610
sewardj25a85812005-05-08 23:03:48 +00005611 case 0xFB: { /* FSINCOS */
5612 IRTemp a1 = newTemp(Ity_F64);
5613 assign( a1, get_ST(0) );
5614 DIP("fsincos\n");
sewardj4796d662006-02-05 16:06:26 +00005615 put_ST_UNCHECKED(0,
5616 binop(Iop_SinF64,
5617 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5618 mkexpr(a1)));
sewardj25a85812005-05-08 23:03:48 +00005619 fp_push();
sewardj4796d662006-02-05 16:06:26 +00005620 put_ST(0,
5621 binop(Iop_CosF64,
5622 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5623 mkexpr(a1)));
sewardj25a85812005-05-08 23:03:48 +00005624 clear_C2(); /* HACK */
5625 break;
5626 }
5627
5628 case 0xFC: /* FRNDINT */
5629 DIP("frndint\n");
5630 put_ST_UNCHECKED(0,
sewardjb183b852006-02-03 16:08:03 +00005631 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
sewardj25a85812005-05-08 23:03:48 +00005632 break;
5633
5634 case 0xFD: /* FSCALE */
5635 DIP("fscale\n");
sewardj4796d662006-02-05 16:06:26 +00005636 put_ST_UNCHECKED(0,
5637 triop(Iop_ScaleF64,
5638 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5639 get_ST(0),
5640 get_ST(1)));
sewardj25a85812005-05-08 23:03:48 +00005641 break;
5642
5643 case 0xFE: /* FSIN */
5644 DIP("fsin\n");
sewardj4796d662006-02-05 16:06:26 +00005645 put_ST_UNCHECKED(0,
5646 binop(Iop_SinF64,
5647 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5648 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005649 clear_C2(); /* HACK */
5650 break;
5651
5652 case 0xFF: /* FCOS */
5653 DIP("fcos\n");
sewardj4796d662006-02-05 16:06:26 +00005654 put_ST_UNCHECKED(0,
5655 binop(Iop_CosF64,
5656 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5657 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005658 clear_C2(); /* HACK */
5659 break;
sewardj8d965312005-02-25 02:48:47 +00005660
5661 default:
5662 goto decode_fail;
5663 }
5664 }
5665 }
5666
5667 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
5668 else
5669 if (first_opcode == 0xDA) {
5670
5671 if (modrm < 0xC0) {
5672
5673 /* bits 5,4,3 are an opcode extension, and the modRM also
5674 specifies an address. */
sewardj6847d8c2005-05-12 19:21:55 +00005675 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00005676 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00005677 delta += len;
sewardj901ed122005-02-27 13:25:31 +00005678 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00005679
sewardj6847d8c2005-05-12 19:21:55 +00005680 case 0: /* FIADD m32int */ /* ST(0) += m32int */
5681 DIP("fiaddl %s\n", dis_buf);
5682 fop = Iop_AddF64;
5683 goto do_fop_m32;
5684
5685 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
5686 DIP("fimull %s\n", dis_buf);
5687 fop = Iop_MulF64;
5688 goto do_fop_m32;
5689
5690 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
5691 DIP("fisubl %s\n", dis_buf);
5692 fop = Iop_SubF64;
5693 goto do_fop_m32;
5694
5695 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
5696 DIP("fisubrl %s\n", dis_buf);
5697 fop = Iop_SubF64;
5698 goto do_foprev_m32;
5699
5700 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
5701 DIP("fisubl %s\n", dis_buf);
5702 fop = Iop_DivF64;
5703 goto do_fop_m32;
5704
5705 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
5706 DIP("fidivrl %s\n", dis_buf);
5707 fop = Iop_DivF64;
5708 goto do_foprev_m32;
5709
5710 do_fop_m32:
5711 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005712 triop(fop,
5713 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00005714 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00005715 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00005716 loadLE(Ity_I32, mkexpr(addr)))));
5717 break;
5718
5719 do_foprev_m32:
5720 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005721 triop(fop,
5722 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00005723 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00005724 loadLE(Ity_I32, mkexpr(addr))),
5725 get_ST(0)));
5726 break;
sewardj8d965312005-02-25 02:48:47 +00005727
5728 default:
sewardj901ed122005-02-27 13:25:31 +00005729 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00005730 vex_printf("first_opcode == 0xDA\n");
5731 goto decode_fail;
5732 }
5733
5734 } else {
5735
5736 delta++;
5737 switch (modrm) {
5738
sewardj48a89d82005-05-06 11:50:13 +00005739 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
5740 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00005741 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00005742 put_ST_UNCHECKED(0,
5743 IRExpr_Mux0X(
5744 unop(Iop_1Uto8,
5745 mk_amd64g_calculate_condition(AMD64CondB)),
5746 get_ST(0), get_ST(r_src)) );
5747 break;
sewardj8d965312005-02-25 02:48:47 +00005748
5749 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
5750 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00005751 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00005752 put_ST_UNCHECKED(0,
5753 IRExpr_Mux0X(
5754 unop(Iop_1Uto8,
5755 mk_amd64g_calculate_condition(AMD64CondZ)),
5756 get_ST(0), get_ST(r_src)) );
5757 break;
5758
sewardj37d52572005-02-25 14:22:12 +00005759 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
5760 r_src = (UInt)modrm - 0xD0;
sewardj1027dc22005-02-26 01:55:02 +00005761 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
sewardj37d52572005-02-25 14:22:12 +00005762 put_ST_UNCHECKED(0,
5763 IRExpr_Mux0X(
5764 unop(Iop_1Uto8,
5765 mk_amd64g_calculate_condition(AMD64CondBE)),
5766 get_ST(0), get_ST(r_src)) );
5767 break;
5768
sewardj25a85812005-05-08 23:03:48 +00005769 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
5770 r_src = (UInt)modrm - 0xD8;
5771 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
5772 put_ST_UNCHECKED(0,
5773 IRExpr_Mux0X(
5774 unop(Iop_1Uto8,
5775 mk_amd64g_calculate_condition(AMD64CondP)),
5776 get_ST(0), get_ST(r_src)) );
5777 break;
5778
sewardje7f277a2008-04-28 21:05:33 +00005779 case 0xE9: /* FUCOMPP %st(0),%st(1) */
5780 DIP("fucompp %%st(0),%%st(1)\n");
5781 /* This forces C1 to zero, which isn't right. */
5782 put_C3210(
5783 unop(Iop_32Uto64,
5784 binop( Iop_And32,
5785 binop(Iop_Shl32,
5786 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
5787 mkU8(8)),
5788 mkU32(0x4500)
5789 )));
5790 fp_pop();
5791 fp_pop();
5792 break;
sewardj8d965312005-02-25 02:48:47 +00005793
5794 default:
5795 goto decode_fail;
5796 }
5797
5798 }
5799 }
5800
sewardjc49ce232005-02-25 13:03:03 +00005801 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
5802 else
5803 if (first_opcode == 0xDB) {
5804 if (modrm < 0xC0) {
5805
5806 /* bits 5,4,3 are an opcode extension, and the modRM also
5807 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005808 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00005809 delta += len;
5810
sewardj901ed122005-02-27 13:25:31 +00005811 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00005812
sewardj5cc00ff2005-03-27 04:48:32 +00005813 case 0: /* FILD m32int */
5814 DIP("fildl %s\n", dis_buf);
5815 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00005816 put_ST(0, unop(Iop_I32StoF64,
sewardj5cc00ff2005-03-27 04:48:32 +00005817 loadLE(Ity_I32, mkexpr(addr))));
5818 break;
5819
sewardjfcf21f32006-08-04 14:51:19 +00005820 case 1: /* FISTTPL m32 (SSE3) */
5821 DIP("fisttpl %s\n", dis_buf);
5822 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00005823 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00005824 fp_pop();
5825 break;
5826
sewardj6847d8c2005-05-12 19:21:55 +00005827 case 2: /* FIST m32 */
5828 DIP("fistl %s\n", dis_buf);
5829 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00005830 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00005831 break;
sewardj37d52572005-02-25 14:22:12 +00005832
5833 case 3: /* FISTP m32 */
5834 DIP("fistpl %s\n", dis_buf);
5835 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00005836 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj37d52572005-02-25 14:22:12 +00005837 fp_pop();
5838 break;
5839
sewardj924215b2005-03-26 21:50:31 +00005840 case 5: { /* FLD extended-real */
5841 /* Uses dirty helper:
5842 ULong amd64g_loadF80le ( ULong )
5843 addr holds the address. First, do a dirty call to
5844 get hold of the data. */
5845 IRTemp val = newTemp(Ity_I64);
5846 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
5847
5848 IRDirty* d = unsafeIRDirty_1_N (
5849 val,
5850 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00005851 "amd64g_dirtyhelper_loadF80le",
5852 &amd64g_dirtyhelper_loadF80le,
sewardj924215b2005-03-26 21:50:31 +00005853 args
5854 );
5855 /* declare that we're reading memory */
5856 d->mFx = Ifx_Read;
5857 d->mAddr = mkexpr(addr);
5858 d->mSize = 10;
5859
5860 /* execute the dirty call, dumping the result in val. */
5861 stmt( IRStmt_Dirty(d) );
5862 fp_push();
5863 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
5864
5865 DIP("fldt %s\n", dis_buf);
5866 break;
5867 }
5868
5869 case 7: { /* FSTP extended-real */
5870 /* Uses dirty helper:
5871 void amd64g_storeF80le ( ULong addr, ULong data )
5872 */
5873 IRExpr** args
5874 = mkIRExprVec_2( mkexpr(addr),
5875 unop(Iop_ReinterpF64asI64, get_ST(0)) );
5876
5877 IRDirty* d = unsafeIRDirty_0_N (
5878 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00005879 "amd64g_dirtyhelper_storeF80le",
5880 &amd64g_dirtyhelper_storeF80le,
sewardj924215b2005-03-26 21:50:31 +00005881 args
5882 );
5883 /* declare we're writing memory */
5884 d->mFx = Ifx_Write;
5885 d->mAddr = mkexpr(addr);
5886 d->mSize = 10;
5887
5888 /* execute the dirty call. */
5889 stmt( IRStmt_Dirty(d) );
5890 fp_pop();
5891
5892 DIP("fstpt\n %s", dis_buf);
5893 break;
5894 }
sewardjc49ce232005-02-25 13:03:03 +00005895
5896 default:
sewardj901ed122005-02-27 13:25:31 +00005897 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00005898 vex_printf("first_opcode == 0xDB\n");
5899 goto decode_fail;
5900 }
5901
5902 } else {
5903
5904 delta++;
5905 switch (modrm) {
5906
sewardj48a89d82005-05-06 11:50:13 +00005907 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
5908 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00005909 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00005910 put_ST_UNCHECKED(0,
5911 IRExpr_Mux0X(
5912 unop(Iop_1Uto8,
5913 mk_amd64g_calculate_condition(AMD64CondNB)),
5914 get_ST(0), get_ST(r_src)) );
5915 break;
sewardj924215b2005-03-26 21:50:31 +00005916
5917 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
5918 r_src = (UInt)modrm - 0xC8;
sewardj40e144d2005-03-28 00:46:27 +00005919 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00005920 put_ST_UNCHECKED(
5921 0,
5922 IRExpr_Mux0X(
5923 unop(Iop_1Uto8,
5924 mk_amd64g_calculate_condition(AMD64CondNZ)),
5925 get_ST(0),
5926 get_ST(r_src)
5927 )
5928 );
sewardj924215b2005-03-26 21:50:31 +00005929 break;
5930
sewardj137015d2005-03-27 04:01:15 +00005931 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
5932 r_src = (UInt)modrm - 0xD0;
sewardj40e144d2005-03-28 00:46:27 +00005933 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00005934 put_ST_UNCHECKED(
5935 0,
5936 IRExpr_Mux0X(
5937 unop(Iop_1Uto8,
5938 mk_amd64g_calculate_condition(AMD64CondNBE)),
5939 get_ST(0),
5940 get_ST(r_src)
5941 )
5942 );
5943 break;
5944
sewardj3368e102006-03-06 19:05:07 +00005945 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
5946 r_src = (UInt)modrm - 0xD8;
5947 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
5948 put_ST_UNCHECKED(
5949 0,
5950 IRExpr_Mux0X(
5951 unop(Iop_1Uto8,
5952 mk_amd64g_calculate_condition(AMD64CondNP)),
5953 get_ST(0),
5954 get_ST(r_src)
5955 )
5956 );
5957 break;
5958
sewardj4e1a1e92005-05-25 00:44:13 +00005959 case 0xE2:
5960 DIP("fnclex\n");
5961 break;
5962
sewardj0585a032005-11-05 02:55:06 +00005963 case 0xE3: {
5964 /* Uses dirty helper:
5965 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5966 IRDirty* d = unsafeIRDirty_0_N (
5967 0/*regparms*/,
5968 "amd64g_dirtyhelper_FINIT",
5969 &amd64g_dirtyhelper_FINIT,
5970 mkIRExprVec_0()
5971 );
5972 d->needsBBP = True;
5973
5974 /* declare we're writing guest state */
5975 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00005976 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj0585a032005-11-05 02:55:06 +00005977
5978 d->fxState[0].fx = Ifx_Write;
5979 d->fxState[0].offset = OFFB_FTOP;
5980 d->fxState[0].size = sizeof(UInt);
5981
5982 d->fxState[1].fx = Ifx_Write;
5983 d->fxState[1].offset = OFFB_FPREGS;
5984 d->fxState[1].size = 8 * sizeof(ULong);
5985
5986 d->fxState[2].fx = Ifx_Write;
5987 d->fxState[2].offset = OFFB_FPTAGS;
5988 d->fxState[2].size = 8 * sizeof(UChar);
5989
5990 d->fxState[3].fx = Ifx_Write;
5991 d->fxState[3].offset = OFFB_FPROUND;
5992 d->fxState[3].size = sizeof(ULong);
5993
5994 d->fxState[4].fx = Ifx_Write;
5995 d->fxState[4].offset = OFFB_FC3210;
5996 d->fxState[4].size = sizeof(ULong);
5997
5998 stmt( IRStmt_Dirty(d) );
5999
6000 DIP("fninit\n");
6001 break;
6002 }
sewardjc49ce232005-02-25 13:03:03 +00006003
6004 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6005 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6006 break;
6007
sewardj48a89d82005-05-06 11:50:13 +00006008 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6009 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6010 break;
sewardjc49ce232005-02-25 13:03:03 +00006011
6012 default:
6013 goto decode_fail;
6014 }
6015 }
6016 }
6017
sewardj137015d2005-03-27 04:01:15 +00006018 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6019 else
6020 if (first_opcode == 0xDC) {
6021 if (modrm < 0xC0) {
6022
sewardj434e0692005-03-27 17:36:08 +00006023 /* bits 5,4,3 are an opcode extension, and the modRM also
6024 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006025 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj434e0692005-03-27 17:36:08 +00006026 delta += len;
6027
6028 switch (gregLO3ofRM(modrm)) {
6029
sewardje6939f02005-05-07 01:01:24 +00006030 case 0: /* FADD double-real */
6031 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6032 break;
6033
6034 case 1: /* FMUL double-real */
6035 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6036 break;
6037
sewardjd20c8852005-01-20 20:04:07 +00006038//.. case 2: /* FCOM double-real */
6039//.. DIP("fcoml %s\n", dis_buf);
6040//.. /* This forces C1 to zero, which isn't right. */
6041//.. put_C3210(
6042//.. binop( Iop_And32,
6043//.. binop(Iop_Shl32,
6044//.. binop(Iop_CmpF64,
6045//.. get_ST(0),
6046//.. loadLE(Ity_F64,mkexpr(addr))),
6047//.. mkU8(8)),
6048//.. mkU32(0x4500)
6049//.. ));
6050//.. break;
sewardj566d2c72005-08-10 11:43:42 +00006051
6052 case 3: /* FCOMP double-real */
6053 DIP("fcompl %s\n", dis_buf);
6054 /* This forces C1 to zero, which isn't right. */
6055 put_C3210(
6056 unop(Iop_32Uto64,
6057 binop( Iop_And32,
6058 binop(Iop_Shl32,
6059 binop(Iop_CmpF64,
6060 get_ST(0),
6061 loadLE(Ity_F64,mkexpr(addr))),
6062 mkU8(8)),
6063 mkU32(0x4500)
6064 )));
6065 fp_pop();
6066 break;
sewardje6939f02005-05-07 01:01:24 +00006067
6068 case 4: /* FSUB double-real */
6069 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6070 break;
sewardj434e0692005-03-27 17:36:08 +00006071
6072 case 5: /* FSUBR double-real */
6073 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6074 break;
6075
sewardje6939f02005-05-07 01:01:24 +00006076 case 6: /* FDIV double-real */
6077 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6078 break;
6079
6080 case 7: /* FDIVR double-real */
6081 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6082 break;
sewardj434e0692005-03-27 17:36:08 +00006083
6084 default:
6085 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6086 vex_printf("first_opcode == 0xDC\n");
6087 goto decode_fail;
6088 }
sewardj137015d2005-03-27 04:01:15 +00006089
6090 } else {
6091
6092 delta++;
6093 switch (modrm) {
6094
6095 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6096 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6097 break;
6098
sewardj7bc00082005-03-27 05:08:32 +00006099 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6100 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6101 break;
6102
sewardj434e0692005-03-27 17:36:08 +00006103 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6104 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6105 break;
6106
sewardje6939f02005-05-07 01:01:24 +00006107 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6108 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6109 break;
6110
6111 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6112 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6113 break;
sewardj137015d2005-03-27 04:01:15 +00006114
6115 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6116 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6117 break;
6118
6119 default:
6120 goto decode_fail;
6121 }
6122
6123 }
6124 }
sewardj8d965312005-02-25 02:48:47 +00006125
6126 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6127 else
6128 if (first_opcode == 0xDD) {
6129
6130 if (modrm < 0xC0) {
6131
6132 /* bits 5,4,3 are an opcode extension, and the modRM also
6133 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006134 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00006135 delta += len;
6136
sewardj901ed122005-02-27 13:25:31 +00006137 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00006138
6139 case 0: /* FLD double-real */
6140 DIP("fldl %s\n", dis_buf);
6141 fp_push();
sewardjaf1ceca2005-06-30 23:31:27 +00006142 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
sewardj8d965312005-02-25 02:48:47 +00006143 break;
6144
sewardjfcf21f32006-08-04 14:51:19 +00006145 case 1: /* FISTTPQ m64 (SSE3) */
6146 DIP("fistppll %s\n", dis_buf);
6147 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006148 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00006149 fp_pop();
6150 break;
6151
sewardjc49ce232005-02-25 13:03:03 +00006152 case 2: /* FST double-real */
6153 DIP("fstl %s\n", dis_buf);
6154 storeLE(mkexpr(addr), get_ST(0));
6155 break;
sewardj8d965312005-02-25 02:48:47 +00006156
6157 case 3: /* FSTP double-real */
6158 DIP("fstpl %s\n", dis_buf);
6159 storeLE(mkexpr(addr), get_ST(0));
6160 fp_pop();
6161 break;
6162
sewardj9ae42a72012-02-16 14:18:56 +00006163 case 4: { /* FRSTOR m94/m108 */
6164 IRTemp ew = newTemp(Ity_I32);
6165 IRTemp w64 = newTemp(Ity_I64);
6166 IRDirty* d;
6167 if ( have66(pfx) ) {
6168 /* Uses dirty helper:
6169 VexEmWarn amd64g_dirtyhelper_FRSTORS
6170 ( VexGuestAMD64State*, HWord ) */
6171 d = unsafeIRDirty_0_N (
6172 0/*regparms*/,
6173 "amd64g_dirtyhelper_FRSTORS",
6174 &amd64g_dirtyhelper_FRSTORS,
6175 mkIRExprVec_1( mkexpr(addr) )
6176 );
6177 d->mSize = 94;
6178 } else {
6179 /* Uses dirty helper:
6180 VexEmWarn amd64g_dirtyhelper_FRSTOR
6181 ( VexGuestAMD64State*, HWord ) */
6182 d = unsafeIRDirty_0_N (
6183 0/*regparms*/,
6184 "amd64g_dirtyhelper_FRSTOR",
6185 &amd64g_dirtyhelper_FRSTOR,
6186 mkIRExprVec_1( mkexpr(addr) )
6187 );
6188 d->mSize = 108;
6189 }
6190
6191 d->needsBBP = True;
6192 d->tmp = w64;
6193 /* declare we're reading memory */
6194 d->mFx = Ifx_Read;
6195 d->mAddr = mkexpr(addr);
6196 /* d->mSize set above */
6197
6198 /* declare we're writing guest state */
6199 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006200 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006201
6202 d->fxState[0].fx = Ifx_Write;
6203 d->fxState[0].offset = OFFB_FTOP;
6204 d->fxState[0].size = sizeof(UInt);
6205
6206 d->fxState[1].fx = Ifx_Write;
6207 d->fxState[1].offset = OFFB_FPREGS;
6208 d->fxState[1].size = 8 * sizeof(ULong);
6209
6210 d->fxState[2].fx = Ifx_Write;
6211 d->fxState[2].offset = OFFB_FPTAGS;
6212 d->fxState[2].size = 8 * sizeof(UChar);
6213
6214 d->fxState[3].fx = Ifx_Write;
6215 d->fxState[3].offset = OFFB_FPROUND;
6216 d->fxState[3].size = sizeof(ULong);
6217
6218 d->fxState[4].fx = Ifx_Write;
6219 d->fxState[4].offset = OFFB_FC3210;
6220 d->fxState[4].size = sizeof(ULong);
6221
6222 stmt( IRStmt_Dirty(d) );
6223
6224 /* ew contains any emulation warning we may need to
6225 issue. If needed, side-exit to the next insn,
6226 reporting the warning, so that Valgrind's dispatcher
6227 sees the warning. */
6228 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6229 put_emwarn( mkexpr(ew) );
6230 stmt(
6231 IRStmt_Exit(
6232 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6233 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00006234 IRConst_U64( guest_RIP_bbstart+delta ),
6235 OFFB_RIP
sewardj9ae42a72012-02-16 14:18:56 +00006236 )
6237 );
6238
6239 if ( have66(pfx) ) {
6240 DIP("frstors %s\n", dis_buf);
6241 } else {
6242 DIP("frstor %s\n", dis_buf);
6243 }
6244 break;
6245 }
6246
6247 case 6: { /* FNSAVE m94/m108 */
6248 IRDirty *d;
6249 if ( have66(pfx) ) {
6250 /* Uses dirty helper:
6251 void amd64g_dirtyhelper_FNSAVES ( VexGuestX86State*, HWord ) */
6252 d = unsafeIRDirty_0_N (
6253 0/*regparms*/,
6254 "amd64g_dirtyhelper_FNSAVES",
6255 &amd64g_dirtyhelper_FNSAVES,
6256 mkIRExprVec_1( mkexpr(addr) )
6257 );
6258 d->mSize = 94;
6259 } else {
6260 /* Uses dirty helper:
6261 void amd64g_dirtyhelper_FNSAVE ( VexGuestX86State*, HWord ) */
6262 d = unsafeIRDirty_0_N (
6263 0/*regparms*/,
6264 "amd64g_dirtyhelper_FNSAVE",
6265 &amd64g_dirtyhelper_FNSAVE,
6266 mkIRExprVec_1( mkexpr(addr) )
6267 );
6268 d->mSize = 108;
6269 }
6270 d->needsBBP = True;
6271 /* declare we're writing memory */
6272 d->mFx = Ifx_Write;
6273 d->mAddr = mkexpr(addr);
6274 /* d->mSize set above */
6275
6276 /* declare we're reading guest state */
6277 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006278 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006279
6280 d->fxState[0].fx = Ifx_Read;
6281 d->fxState[0].offset = OFFB_FTOP;
6282 d->fxState[0].size = sizeof(UInt);
6283
6284 d->fxState[1].fx = Ifx_Read;
6285 d->fxState[1].offset = OFFB_FPREGS;
6286 d->fxState[1].size = 8 * sizeof(ULong);
6287
6288 d->fxState[2].fx = Ifx_Read;
6289 d->fxState[2].offset = OFFB_FPTAGS;
6290 d->fxState[2].size = 8 * sizeof(UChar);
6291
6292 d->fxState[3].fx = Ifx_Read;
6293 d->fxState[3].offset = OFFB_FPROUND;
6294 d->fxState[3].size = sizeof(ULong);
6295
6296 d->fxState[4].fx = Ifx_Read;
6297 d->fxState[4].offset = OFFB_FC3210;
6298 d->fxState[4].size = sizeof(ULong);
6299
6300 stmt( IRStmt_Dirty(d) );
6301
6302 if ( have66(pfx) ) {
6303 DIP("fnsaves %s\n", dis_buf);
6304 } else {
6305 DIP("fnsave %s\n", dis_buf);
6306 }
6307 break;
6308 }
sewardj8d965312005-02-25 02:48:47 +00006309
sewardj7c2d2822006-03-07 00:22:02 +00006310 case 7: { /* FNSTSW m16 */
6311 IRExpr* sw = get_FPU_sw();
sewardjdd40fdf2006-12-24 02:20:24 +00006312 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
sewardj7c2d2822006-03-07 00:22:02 +00006313 storeLE( mkexpr(addr), sw );
6314 DIP("fnstsw %s\n", dis_buf);
6315 break;
6316 }
6317
sewardj8d965312005-02-25 02:48:47 +00006318 default:
sewardj901ed122005-02-27 13:25:31 +00006319 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00006320 vex_printf("first_opcode == 0xDD\n");
6321 goto decode_fail;
6322 }
6323 } else {
6324 delta++;
6325 switch (modrm) {
6326
sewardj6847d8c2005-05-12 19:21:55 +00006327 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6328 r_dst = (UInt)modrm - 0xC0;
6329 DIP("ffree %%st(%u)\n", r_dst);
6330 put_ST_TAG ( r_dst, mkU8(0) );
6331 break;
6332
sewardjbfabcc42005-08-08 09:58:05 +00006333 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6334 r_dst = (UInt)modrm - 0xD0;
sewardjc7cd2142005-09-09 22:31:49 +00006335 DIP("fst %%st(0),%%st(%u)\n", r_dst);
sewardjbfabcc42005-08-08 09:58:05 +00006336 /* P4 manual says: "If the destination operand is a
6337 non-empty register, the invalid-operation exception
6338 is not generated. Hence put_ST_UNCHECKED. */
6339 put_ST_UNCHECKED(r_dst, get_ST(0));
6340 break;
sewardj8d965312005-02-25 02:48:47 +00006341
6342 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6343 r_dst = (UInt)modrm - 0xD8;
sewardj1027dc22005-02-26 01:55:02 +00006344 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
sewardj8d965312005-02-25 02:48:47 +00006345 /* P4 manual says: "If the destination operand is a
6346 non-empty register, the invalid-operation exception
6347 is not generated. Hence put_ST_UNCHECKED. */
6348 put_ST_UNCHECKED(r_dst, get_ST(0));
6349 fp_pop();
6350 break;
6351
sewardjfb6c1792005-10-05 17:58:32 +00006352 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6353 r_dst = (UInt)modrm - 0xE0;
sewardj62d05432005-10-29 22:30:47 +00006354 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
sewardjfb6c1792005-10-05 17:58:32 +00006355 /* This forces C1 to zero, which isn't right. */
6356 put_C3210(
6357 unop(Iop_32Uto64,
6358 binop( Iop_And32,
6359 binop(Iop_Shl32,
6360 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6361 mkU8(8)),
6362 mkU32(0x4500)
6363 )));
6364 break;
6365
sewardj9fb2f472005-11-05 01:12:18 +00006366 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6367 r_dst = (UInt)modrm - 0xE8;
sewardj43f45732005-11-05 13:04:34 +00006368 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
sewardj9fb2f472005-11-05 01:12:18 +00006369 /* This forces C1 to zero, which isn't right. */
6370 put_C3210(
6371 unop(Iop_32Uto64,
6372 binop( Iop_And32,
6373 binop(Iop_Shl32,
6374 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6375 mkU8(8)),
6376 mkU32(0x4500)
6377 )));
6378 fp_pop();
6379 break;
sewardj8d965312005-02-25 02:48:47 +00006380
6381 default:
6382 goto decode_fail;
6383 }
6384 }
6385 }
6386
sewardj137015d2005-03-27 04:01:15 +00006387 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6388 else
6389 if (first_opcode == 0xDE) {
6390
6391 if (modrm < 0xC0) {
6392
sewardj6847d8c2005-05-12 19:21:55 +00006393 /* bits 5,4,3 are an opcode extension, and the modRM also
6394 specifies an address. */
6395 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00006396 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj6847d8c2005-05-12 19:21:55 +00006397 delta += len;
6398
6399 switch (gregLO3ofRM(modrm)) {
6400
6401 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6402 DIP("fiaddw %s\n", dis_buf);
6403 fop = Iop_AddF64;
6404 goto do_fop_m16;
6405
6406 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6407 DIP("fimulw %s\n", dis_buf);
6408 fop = Iop_MulF64;
6409 goto do_fop_m16;
6410
6411 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6412 DIP("fisubw %s\n", dis_buf);
6413 fop = Iop_SubF64;
6414 goto do_fop_m16;
6415
6416 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6417 DIP("fisubrw %s\n", dis_buf);
6418 fop = Iop_SubF64;
6419 goto do_foprev_m16;
6420
6421 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6422 DIP("fisubw %s\n", dis_buf);
6423 fop = Iop_DivF64;
6424 goto do_fop_m16;
6425
6426 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6427 DIP("fidivrw %s\n", dis_buf);
6428 fop = Iop_DivF64;
6429 goto do_foprev_m16;
6430
6431 do_fop_m16:
6432 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006433 triop(fop,
6434 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00006435 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00006436 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006437 unop(Iop_16Sto32,
6438 loadLE(Ity_I16, mkexpr(addr))))));
6439 break;
6440
6441 do_foprev_m16:
6442 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006443 triop(fop,
6444 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00006445 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006446 unop(Iop_16Sto32,
6447 loadLE(Ity_I16, mkexpr(addr)))),
6448 get_ST(0)));
6449 break;
6450
6451 default:
6452 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6453 vex_printf("first_opcode == 0xDE\n");
6454 goto decode_fail;
6455 }
sewardj137015d2005-03-27 04:01:15 +00006456
6457 } else {
6458
6459 delta++;
6460 switch (modrm) {
6461
6462 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6463 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6464 break;
6465
6466 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6467 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6468 break;
6469
sewardj1859ecd2007-02-23 08:48:22 +00006470 case 0xD9: /* FCOMPP %st(0),%st(1) */
6471 DIP("fcompp %%st(0),%%st(1)\n");
6472 /* This forces C1 to zero, which isn't right. */
6473 put_C3210(
6474 unop(Iop_32Uto64,
6475 binop( Iop_And32,
6476 binop(Iop_Shl32,
6477 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6478 mkU8(8)),
6479 mkU32(0x4500)
6480 )));
6481 fp_pop();
6482 fp_pop();
6483 break;
sewardj137015d2005-03-27 04:01:15 +00006484
6485 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6486 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6487 break;
6488
6489 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6490 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6491 break;
6492
6493 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6494 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6495 break;
6496
6497 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6498 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6499 break;
6500
6501 default:
6502 goto decode_fail;
6503 }
6504
6505 }
6506 }
sewardjc49ce232005-02-25 13:03:03 +00006507
6508 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6509 else
6510 if (first_opcode == 0xDF) {
6511
6512 if (modrm < 0xC0) {
6513
6514 /* bits 5,4,3 are an opcode extension, and the modRM also
6515 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006516 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00006517 delta += len;
6518
sewardj901ed122005-02-27 13:25:31 +00006519 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00006520
sewardj434e0692005-03-27 17:36:08 +00006521 case 0: /* FILD m16int */
6522 DIP("fildw %s\n", dis_buf);
6523 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006524 put_ST(0, unop(Iop_I32StoF64,
sewardj434e0692005-03-27 17:36:08 +00006525 unop(Iop_16Sto32,
6526 loadLE(Ity_I16, mkexpr(addr)))));
6527 break;
6528
sewardjfcf21f32006-08-04 14:51:19 +00006529 case 1: /* FISTTPS m16 (SSE3) */
6530 DIP("fisttps %s\n", dis_buf);
6531 storeLE( mkexpr(addr),
sewardjb707d102007-07-11 22:49:26 +00006532 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006533 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
sewardjfcf21f32006-08-04 14:51:19 +00006534 fp_pop();
6535 break;
6536
sewardj9f5c8fd2010-05-10 20:08:12 +00006537 case 2: /* FIST m16 */
6538 DIP("fists %s\n", dis_buf);
6539 storeLE( mkexpr(addr),
6540 x87ishly_qnarrow_32_to_16(
6541 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6542 break;
sewardj6847d8c2005-05-12 19:21:55 +00006543
sewardjb707d102007-07-11 22:49:26 +00006544 case 3: /* FISTP m16 */
6545 DIP("fistps %s\n", dis_buf);
6546 storeLE( mkexpr(addr),
6547 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006548 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
sewardjb707d102007-07-11 22:49:26 +00006549 fp_pop();
6550 break;
sewardj37d52572005-02-25 14:22:12 +00006551
6552 case 5: /* FILD m64 */
6553 DIP("fildll %s\n", dis_buf);
6554 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006555 put_ST(0, binop(Iop_I64StoF64,
sewardj37d52572005-02-25 14:22:12 +00006556 get_roundingmode(),
6557 loadLE(Ity_I64, mkexpr(addr))));
6558 break;
6559
sewardj6847d8c2005-05-12 19:21:55 +00006560 case 7: /* FISTP m64 */
6561 DIP("fistpll %s\n", dis_buf);
6562 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006563 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00006564 fp_pop();
6565 break;
sewardjc49ce232005-02-25 13:03:03 +00006566
6567 default:
sewardj901ed122005-02-27 13:25:31 +00006568 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00006569 vex_printf("first_opcode == 0xDF\n");
6570 goto decode_fail;
6571 }
6572
6573 } else {
6574
6575 delta++;
6576 switch (modrm) {
6577
6578 case 0xC0: /* FFREEP %st(0) */
6579 DIP("ffreep %%st(%d)\n", 0);
6580 put_ST_TAG ( 0, mkU8(0) );
6581 fp_pop();
6582 break;
6583
sewardj4f9847d2005-07-25 11:58:34 +00006584 case 0xE0: /* FNSTSW %ax */
6585 DIP("fnstsw %%ax\n");
6586 /* Invent a plausible-looking FPU status word value and
6587 dump it in %AX:
6588 ((ftop & 7) << 11) | (c3210 & 0x4700)
6589 */
6590 putIRegRAX(
6591 2,
6592 unop(Iop_32to16,
6593 binop(Iop_Or32,
6594 binop(Iop_Shl32,
6595 binop(Iop_And32, get_ftop(), mkU32(7)),
6596 mkU8(11)),
6597 binop(Iop_And32,
6598 unop(Iop_64to32, get_C3210()),
6599 mkU32(0x4700))
6600 )));
6601 break;
sewardj924215b2005-03-26 21:50:31 +00006602
6603 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
6604 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
6605 break;
6606
sewardj48a89d82005-05-06 11:50:13 +00006607 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
6608 /* not really right since COMIP != UCOMIP */
6609 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
6610 break;
sewardjc49ce232005-02-25 13:03:03 +00006611
6612 default:
6613 goto decode_fail;
6614 }
6615 }
6616
6617 }
sewardj8d965312005-02-25 02:48:47 +00006618
6619 else
sewardj137015d2005-03-27 04:01:15 +00006620 goto decode_fail;
sewardj8d965312005-02-25 02:48:47 +00006621
6622 *decode_ok = True;
6623 return delta;
6624
6625 decode_fail:
6626 *decode_ok = False;
6627 return delta;
6628}
6629
6630
sewardj8711f662005-05-09 17:52:56 +00006631/*------------------------------------------------------------*/
6632/*--- ---*/
6633/*--- MMX INSTRUCTIONS ---*/
6634/*--- ---*/
6635/*------------------------------------------------------------*/
6636
6637/* Effect of MMX insns on x87 FPU state (table 11-2 of
6638 IA32 arch manual, volume 3):
6639
6640 Read from, or write to MMX register (viz, any insn except EMMS):
6641 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
6642 * FP stack pointer set to zero
6643
6644 EMMS:
6645 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
6646 * FP stack pointer set to zero
6647*/
6648
6649static void do_MMX_preamble ( void )
6650{
sewardjdd40fdf2006-12-24 02:20:24 +00006651 Int i;
6652 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6653 IRExpr* zero = mkU32(0);
6654 IRExpr* tag1 = mkU8(1);
sewardj8711f662005-05-09 17:52:56 +00006655 put_ftop(zero);
6656 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00006657 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
sewardj8711f662005-05-09 17:52:56 +00006658}
6659
6660static void do_EMMS_preamble ( void )
6661{
sewardjdd40fdf2006-12-24 02:20:24 +00006662 Int i;
6663 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6664 IRExpr* zero = mkU32(0);
6665 IRExpr* tag0 = mkU8(0);
sewardj8711f662005-05-09 17:52:56 +00006666 put_ftop(zero);
6667 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00006668 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
sewardj8711f662005-05-09 17:52:56 +00006669}
6670
6671
6672static IRExpr* getMMXReg ( UInt archreg )
6673{
6674 vassert(archreg < 8);
6675 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
6676}
6677
6678
6679static void putMMXReg ( UInt archreg, IRExpr* e )
6680{
6681 vassert(archreg < 8);
sewardjdd40fdf2006-12-24 02:20:24 +00006682 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj8711f662005-05-09 17:52:56 +00006683 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
6684}
6685
6686
6687/* Helper for non-shift MMX insns. Note this is incomplete in the
6688 sense that it does not first call do_MMX_preamble() -- that is the
6689 responsibility of its caller. */
6690
6691static
sewardj2e28ac42008-12-04 00:05:12 +00006692ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
6693 Prefix pfx,
6694 Long delta,
6695 UChar opc,
6696 HChar* name,
6697 Bool show_granularity )
sewardj8711f662005-05-09 17:52:56 +00006698{
6699 HChar dis_buf[50];
6700 UChar modrm = getUChar(delta);
6701 Bool isReg = epartIsReg(modrm);
6702 IRExpr* argL = NULL;
6703 IRExpr* argR = NULL;
6704 IRExpr* argG = NULL;
6705 IRExpr* argE = NULL;
6706 IRTemp res = newTemp(Ity_I64);
6707
6708 Bool invG = False;
6709 IROp op = Iop_INVALID;
6710 void* hAddr = NULL;
sewardjca673ab2005-05-11 10:03:08 +00006711 HChar* hName = NULL;
sewardj8711f662005-05-09 17:52:56 +00006712 Bool eLeft = False;
6713
6714# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
6715
6716 switch (opc) {
6717 /* Original MMX ones */
6718 case 0xFC: op = Iop_Add8x8; break;
6719 case 0xFD: op = Iop_Add16x4; break;
6720 case 0xFE: op = Iop_Add32x2; break;
6721
6722 case 0xEC: op = Iop_QAdd8Sx8; break;
6723 case 0xED: op = Iop_QAdd16Sx4; break;
6724
6725 case 0xDC: op = Iop_QAdd8Ux8; break;
6726 case 0xDD: op = Iop_QAdd16Ux4; break;
6727
6728 case 0xF8: op = Iop_Sub8x8; break;
6729 case 0xF9: op = Iop_Sub16x4; break;
6730 case 0xFA: op = Iop_Sub32x2; break;
6731
6732 case 0xE8: op = Iop_QSub8Sx8; break;
6733 case 0xE9: op = Iop_QSub16Sx4; break;
6734
6735 case 0xD8: op = Iop_QSub8Ux8; break;
6736 case 0xD9: op = Iop_QSub16Ux4; break;
6737
6738 case 0xE5: op = Iop_MulHi16Sx4; break;
6739 case 0xD5: op = Iop_Mul16x4; break;
6740 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
6741
6742 case 0x74: op = Iop_CmpEQ8x8; break;
6743 case 0x75: op = Iop_CmpEQ16x4; break;
6744 case 0x76: op = Iop_CmpEQ32x2; break;
6745
6746 case 0x64: op = Iop_CmpGT8Sx8; break;
6747 case 0x65: op = Iop_CmpGT16Sx4; break;
6748 case 0x66: op = Iop_CmpGT32Sx2; break;
6749
sewardj5f438dd2011-06-16 11:36:23 +00006750 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
6751 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
6752 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
sewardj8711f662005-05-09 17:52:56 +00006753
6754 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
6755 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
6756 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
6757
6758 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
6759 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
6760 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
6761
6762 case 0xDB: op = Iop_And64; break;
6763 case 0xDF: op = Iop_And64; invG = True; break;
6764 case 0xEB: op = Iop_Or64; break;
6765 case 0xEF: /* Possibly do better here if argL and argR are the
6766 same reg */
6767 op = Iop_Xor64; break;
6768
6769 /* Introduced in SSE1 */
6770 case 0xE0: op = Iop_Avg8Ux8; break;
6771 case 0xE3: op = Iop_Avg16Ux4; break;
6772 case 0xEE: op = Iop_Max16Sx4; break;
6773 case 0xDE: op = Iop_Max8Ux8; break;
6774 case 0xEA: op = Iop_Min16Sx4; break;
6775 case 0xDA: op = Iop_Min8Ux8; break;
6776 case 0xE4: op = Iop_MulHi16Ux4; break;
sewardja7ba8c42005-05-10 20:08:34 +00006777 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
sewardj8711f662005-05-09 17:52:56 +00006778
6779 /* Introduced in SSE2 */
6780 case 0xD4: op = Iop_Add64; break;
6781 case 0xFB: op = Iop_Sub64; break;
6782
6783 default:
6784 vex_printf("\n0x%x\n", (Int)opc);
6785 vpanic("dis_MMXop_regmem_to_reg");
6786 }
6787
6788# undef XXX
6789
6790 argG = getMMXReg(gregLO3ofRM(modrm));
6791 if (invG)
6792 argG = unop(Iop_Not64, argG);
6793
6794 if (isReg) {
6795 delta++;
6796 argE = getMMXReg(eregLO3ofRM(modrm));
6797 } else {
6798 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00006799 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00006800 delta += len;
6801 argE = loadLE(Ity_I64, mkexpr(addr));
6802 }
6803
6804 if (eLeft) {
6805 argL = argE;
6806 argR = argG;
6807 } else {
6808 argL = argG;
6809 argR = argE;
6810 }
6811
6812 if (op != Iop_INVALID) {
6813 vassert(hName == NULL);
6814 vassert(hAddr == NULL);
6815 assign(res, binop(op, argL, argR));
6816 } else {
6817 vassert(hName != NULL);
6818 vassert(hAddr != NULL);
6819 assign( res,
6820 mkIRExprCCall(
6821 Ity_I64,
6822 0/*regparms*/, hName, hAddr,
6823 mkIRExprVec_2( argL, argR )
6824 )
6825 );
6826 }
6827
6828 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
6829
6830 DIP("%s%s %s, %s\n",
6831 name, show_granularity ? nameMMXGran(opc & 3) : "",
6832 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
6833 nameMMXReg(gregLO3ofRM(modrm)) );
6834
6835 return delta;
6836}
6837
6838
6839/* Vector by scalar shift of G by the amount specified at the bottom
6840 of E. This is a straight copy of dis_SSE_shiftG_byE. */
6841
sewardj2e28ac42008-12-04 00:05:12 +00006842static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
6843 Prefix pfx, Long delta,
sewardj8711f662005-05-09 17:52:56 +00006844 HChar* opname, IROp op )
6845{
6846 HChar dis_buf[50];
6847 Int alen, size;
6848 IRTemp addr;
6849 Bool shl, shr, sar;
6850 UChar rm = getUChar(delta);
6851 IRTemp g0 = newTemp(Ity_I64);
6852 IRTemp g1 = newTemp(Ity_I64);
6853 IRTemp amt = newTemp(Ity_I64);
6854 IRTemp amt8 = newTemp(Ity_I8);
6855
6856 if (epartIsReg(rm)) {
6857 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
6858 DIP("%s %s,%s\n", opname,
6859 nameMMXReg(eregLO3ofRM(rm)),
6860 nameMMXReg(gregLO3ofRM(rm)) );
6861 delta++;
6862 } else {
sewardj2e28ac42008-12-04 00:05:12 +00006863 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00006864 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
6865 DIP("%s %s,%s\n", opname,
6866 dis_buf,
6867 nameMMXReg(gregLO3ofRM(rm)) );
6868 delta += alen;
6869 }
6870 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
6871 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
6872
6873 shl = shr = sar = False;
6874 size = 0;
6875 switch (op) {
6876 case Iop_ShlN16x4: shl = True; size = 32; break;
6877 case Iop_ShlN32x2: shl = True; size = 32; break;
6878 case Iop_Shl64: shl = True; size = 64; break;
6879 case Iop_ShrN16x4: shr = True; size = 16; break;
6880 case Iop_ShrN32x2: shr = True; size = 32; break;
6881 case Iop_Shr64: shr = True; size = 64; break;
6882 case Iop_SarN16x4: sar = True; size = 16; break;
6883 case Iop_SarN32x2: sar = True; size = 32; break;
6884 default: vassert(0);
6885 }
6886
6887 if (shl || shr) {
6888 assign(
6889 g1,
6890 IRExpr_Mux0X(
6891 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6892 mkU64(0),
6893 binop(op, mkexpr(g0), mkexpr(amt8))
6894 )
6895 );
6896 } else
6897 if (sar) {
6898 assign(
6899 g1,
6900 IRExpr_Mux0X(
6901 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
6902 binop(op, mkexpr(g0), mkU8(size-1)),
6903 binop(op, mkexpr(g0), mkexpr(amt8))
6904 )
6905 );
6906 } else {
6907 vassert(0);
6908 }
6909
6910 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
6911 return delta;
6912}
6913
6914
sewardj3d8107c2005-05-09 22:23:38 +00006915/* Vector by scalar shift of E by an immediate byte. This is a
6916 straight copy of dis_SSE_shiftE_imm. */
6917
6918static
sewardj270def42005-07-03 01:03:01 +00006919ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op )
sewardj3d8107c2005-05-09 22:23:38 +00006920{
6921 Bool shl, shr, sar;
6922 UChar rm = getUChar(delta);
6923 IRTemp e0 = newTemp(Ity_I64);
6924 IRTemp e1 = newTemp(Ity_I64);
6925 UChar amt, size;
6926 vassert(epartIsReg(rm));
6927 vassert(gregLO3ofRM(rm) == 2
6928 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00006929 amt = getUChar(delta+1);
sewardj3d8107c2005-05-09 22:23:38 +00006930 delta += 2;
6931 DIP("%s $%d,%s\n", opname,
6932 (Int)amt,
6933 nameMMXReg(eregLO3ofRM(rm)) );
6934
6935 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
6936
6937 shl = shr = sar = False;
6938 size = 0;
6939 switch (op) {
6940 case Iop_ShlN16x4: shl = True; size = 16; break;
6941 case Iop_ShlN32x2: shl = True; size = 32; break;
6942 case Iop_Shl64: shl = True; size = 64; break;
6943 case Iop_SarN16x4: sar = True; size = 16; break;
6944 case Iop_SarN32x2: sar = True; size = 32; break;
6945 case Iop_ShrN16x4: shr = True; size = 16; break;
6946 case Iop_ShrN32x2: shr = True; size = 32; break;
6947 case Iop_Shr64: shr = True; size = 64; break;
6948 default: vassert(0);
6949 }
6950
6951 if (shl || shr) {
6952 assign( e1, amt >= size
6953 ? mkU64(0)
6954 : binop(op, mkexpr(e0), mkU8(amt))
6955 );
6956 } else
6957 if (sar) {
6958 assign( e1, amt >= size
6959 ? binop(op, mkexpr(e0), mkU8(size-1))
6960 : binop(op, mkexpr(e0), mkU8(amt))
6961 );
6962 } else {
6963 vassert(0);
6964 }
6965
6966 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
6967 return delta;
6968}
sewardj8711f662005-05-09 17:52:56 +00006969
6970
6971/* Completely handle all MMX instructions except emms. */
6972
6973static
sewardj2e28ac42008-12-04 00:05:12 +00006974ULong dis_MMX ( Bool* decode_ok,
6975 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
sewardj8711f662005-05-09 17:52:56 +00006976{
6977 Int len;
6978 UChar modrm;
6979 HChar dis_buf[50];
6980 UChar opc = getUChar(delta);
6981 delta++;
6982
6983 /* dis_MMX handles all insns except emms. */
6984 do_MMX_preamble();
6985
6986 switch (opc) {
6987
sewardj3d8107c2005-05-09 22:23:38 +00006988 case 0x6E:
6989 if (sz == 4) {
6990 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
6991 modrm = getUChar(delta);
6992 if (epartIsReg(modrm)) {
6993 delta++;
6994 putMMXReg(
6995 gregLO3ofRM(modrm),
6996 binop( Iop_32HLto64,
6997 mkU32(0),
6998 getIReg32(eregOfRexRM(pfx,modrm)) ) );
6999 DIP("movd %s, %s\n",
7000 nameIReg32(eregOfRexRM(pfx,modrm)),
7001 nameMMXReg(gregLO3ofRM(modrm)));
7002 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007003 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007004 delta += len;
7005 putMMXReg(
7006 gregLO3ofRM(modrm),
7007 binop( Iop_32HLto64,
7008 mkU32(0),
7009 loadLE(Ity_I32, mkexpr(addr)) ) );
7010 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7011 }
7012 }
7013 else
7014 if (sz == 8) {
7015 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7016 modrm = getUChar(delta);
7017 if (epartIsReg(modrm)) {
7018 delta++;
7019 putMMXReg( gregLO3ofRM(modrm),
7020 getIReg64(eregOfRexRM(pfx,modrm)) );
7021 DIP("movd %s, %s\n",
7022 nameIReg64(eregOfRexRM(pfx,modrm)),
7023 nameMMXReg(gregLO3ofRM(modrm)));
7024 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007025 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007026 delta += len;
7027 putMMXReg( gregLO3ofRM(modrm),
7028 loadLE(Ity_I64, mkexpr(addr)) );
7029 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7030 }
7031 }
7032 else {
7033 goto mmx_decode_failure;
7034 }
7035 break;
7036
7037 case 0x7E:
7038 if (sz == 4) {
7039 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7040 modrm = getUChar(delta);
7041 if (epartIsReg(modrm)) {
7042 delta++;
7043 putIReg32( eregOfRexRM(pfx,modrm),
7044 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7045 DIP("movd %s, %s\n",
7046 nameMMXReg(gregLO3ofRM(modrm)),
7047 nameIReg32(eregOfRexRM(pfx,modrm)));
7048 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007049 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007050 delta += len;
7051 storeLE( mkexpr(addr),
7052 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7053 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7054 }
7055 }
7056 else
7057 if (sz == 8) {
7058 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7059 modrm = getUChar(delta);
7060 if (epartIsReg(modrm)) {
7061 delta++;
7062 putIReg64( eregOfRexRM(pfx,modrm),
7063 getMMXReg(gregLO3ofRM(modrm)) );
7064 DIP("movd %s, %s\n",
7065 nameMMXReg(gregLO3ofRM(modrm)),
7066 nameIReg64(eregOfRexRM(pfx,modrm)));
7067 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007068 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007069 delta += len;
7070 storeLE( mkexpr(addr),
7071 getMMXReg(gregLO3ofRM(modrm)) );
7072 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7073 }
7074 } else {
7075 goto mmx_decode_failure;
7076 }
7077 break;
sewardj8711f662005-05-09 17:52:56 +00007078
7079 case 0x6F:
7080 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007081 if (sz != 4
7082 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007083 goto mmx_decode_failure;
7084 modrm = getUChar(delta);
7085 if (epartIsReg(modrm)) {
7086 delta++;
7087 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7088 DIP("movq %s, %s\n",
7089 nameMMXReg(eregLO3ofRM(modrm)),
7090 nameMMXReg(gregLO3ofRM(modrm)));
7091 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007092 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007093 delta += len;
7094 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7095 DIP("movq %s, %s\n",
7096 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7097 }
7098 break;
7099
7100 case 0x7F:
7101 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
sewardj7bdd1bc2008-12-13 16:49:46 +00007102 if (sz != 4
7103 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007104 goto mmx_decode_failure;
7105 modrm = getUChar(delta);
7106 if (epartIsReg(modrm)) {
7107 /* Fall through. The assembler doesn't appear to generate
7108 these. */
7109 goto mmx_decode_failure;
7110 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007111 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007112 delta += len;
7113 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7114 DIP("mov(nt)q %s, %s\n",
7115 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7116 }
7117 break;
7118
7119 case 0xFC:
7120 case 0xFD:
7121 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7122 if (sz != 4)
7123 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007124 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
sewardj8711f662005-05-09 17:52:56 +00007125 break;
7126
7127 case 0xEC:
7128 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007129 if (sz != 4
7130 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007131 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007132 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
sewardj8711f662005-05-09 17:52:56 +00007133 break;
7134
7135 case 0xDC:
7136 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7137 if (sz != 4)
7138 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007139 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
sewardj8711f662005-05-09 17:52:56 +00007140 break;
7141
7142 case 0xF8:
7143 case 0xF9:
7144 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7145 if (sz != 4)
7146 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007147 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
sewardj8711f662005-05-09 17:52:56 +00007148 break;
7149
7150 case 0xE8:
7151 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7152 if (sz != 4)
7153 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007154 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
sewardj8711f662005-05-09 17:52:56 +00007155 break;
7156
7157 case 0xD8:
7158 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7159 if (sz != 4)
7160 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007161 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
sewardj8711f662005-05-09 17:52:56 +00007162 break;
7163
7164 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7165 if (sz != 4)
7166 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007167 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
sewardj8711f662005-05-09 17:52:56 +00007168 break;
7169
7170 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7171 if (sz != 4)
7172 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007173 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
sewardj8711f662005-05-09 17:52:56 +00007174 break;
7175
7176 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7177 vassert(sz == 4);
sewardj2e28ac42008-12-04 00:05:12 +00007178 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
sewardj8711f662005-05-09 17:52:56 +00007179 break;
7180
7181 case 0x74:
7182 case 0x75:
7183 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7184 if (sz != 4)
7185 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007186 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
sewardj8711f662005-05-09 17:52:56 +00007187 break;
7188
7189 case 0x64:
7190 case 0x65:
7191 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7192 if (sz != 4)
7193 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007194 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
sewardj8711f662005-05-09 17:52:56 +00007195 break;
7196
7197 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7198 if (sz != 4)
7199 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007200 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
sewardj8711f662005-05-09 17:52:56 +00007201 break;
7202
7203 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7204 if (sz != 4)
7205 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007206 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
sewardj8711f662005-05-09 17:52:56 +00007207 break;
7208
7209 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7210 if (sz != 4)
7211 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007212 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
sewardj8711f662005-05-09 17:52:56 +00007213 break;
7214
7215 case 0x68:
7216 case 0x69:
7217 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj23063322009-01-24 10:34:19 +00007218 if (sz != 4
7219 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007220 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007221 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
sewardj8711f662005-05-09 17:52:56 +00007222 break;
7223
7224 case 0x60:
7225 case 0x61:
7226 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007227 if (sz != 4
7228 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007229 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007230 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
sewardj8711f662005-05-09 17:52:56 +00007231 break;
7232
7233 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7234 if (sz != 4)
7235 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007236 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
sewardj8711f662005-05-09 17:52:56 +00007237 break;
7238
7239 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7240 if (sz != 4)
7241 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007242 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
sewardj8711f662005-05-09 17:52:56 +00007243 break;
7244
7245 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7246 if (sz != 4)
7247 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007248 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
sewardj8711f662005-05-09 17:52:56 +00007249 break;
7250
7251 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7252 if (sz != 4)
7253 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007254 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
sewardj8711f662005-05-09 17:52:56 +00007255 break;
7256
sewardj2e28ac42008-12-04 00:05:12 +00007257# define SHIFT_BY_REG(_name,_op) \
7258 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
sewardj8711f662005-05-09 17:52:56 +00007259 break;
7260
7261 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7262 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7263 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7264 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7265
7266 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7267 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7268 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7269 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7270
7271 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7272 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7273 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7274
7275# undef SHIFT_BY_REG
sewardj3d8107c2005-05-09 22:23:38 +00007276
7277 case 0x71:
7278 case 0x72:
7279 case 0x73: {
7280 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
sewardjca673ab2005-05-11 10:03:08 +00007281 UChar byte2, subopc;
sewardj3d8107c2005-05-09 22:23:38 +00007282 if (sz != 4)
7283 goto mmx_decode_failure;
sewardjca673ab2005-05-11 10:03:08 +00007284 byte2 = getUChar(delta); /* amode / sub-opcode */
7285 subopc = toUChar( (byte2 >> 3) & 7 );
sewardj3d8107c2005-05-09 22:23:38 +00007286
7287# define SHIFT_BY_IMM(_name,_op) \
7288 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7289 } while (0)
7290
7291 if (subopc == 2 /*SRL*/ && opc == 0x71)
7292 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7293 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7294 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7295 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7296 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7297
7298 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7299 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7300 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7301 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7302
7303 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7304 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7305 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7306 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7307 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7308 SHIFT_BY_IMM("psllq", Iop_Shl64);
7309
7310 else goto mmx_decode_failure;
7311
7312# undef SHIFT_BY_IMM
7313 break;
7314 }
sewardj8711f662005-05-09 17:52:56 +00007315
sewardj02f79f12007-09-01 18:59:53 +00007316 case 0xF7: {
7317 IRTemp addr = newTemp(Ity_I64);
7318 IRTemp regD = newTemp(Ity_I64);
7319 IRTemp regM = newTemp(Ity_I64);
7320 IRTemp mask = newTemp(Ity_I64);
7321 IRTemp olddata = newTemp(Ity_I64);
7322 IRTemp newdata = newTemp(Ity_I64);
7323
7324 modrm = getUChar(delta);
7325 if (sz != 4 || (!epartIsReg(modrm)))
7326 goto mmx_decode_failure;
7327 delta++;
7328
sewardj2e28ac42008-12-04 00:05:12 +00007329 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
sewardj02f79f12007-09-01 18:59:53 +00007330 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7331 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7332 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7333 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7334 assign( newdata,
7335 binop(Iop_Or64,
7336 binop(Iop_And64,
7337 mkexpr(regD),
7338 mkexpr(mask) ),
7339 binop(Iop_And64,
7340 mkexpr(olddata),
7341 unop(Iop_Not64, mkexpr(mask)))) );
7342 storeLE( mkexpr(addr), mkexpr(newdata) );
7343 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7344 nameMMXReg( gregLO3ofRM(modrm) ) );
7345 break;
7346 }
7347
sewardj8711f662005-05-09 17:52:56 +00007348 /* --- MMX decode failure --- */
7349 default:
7350 mmx_decode_failure:
7351 *decode_ok = False;
7352 return delta; /* ignored */
7353
7354 }
7355
7356 *decode_ok = True;
7357 return delta;
7358}
7359
7360
sewardj33ef9c22005-11-04 20:05:57 +00007361/*------------------------------------------------------------*/
7362/*--- More misc arithmetic and other obscure insns. ---*/
7363/*------------------------------------------------------------*/
7364
7365/* Generate base << amt with vacated places filled with stuff
7366 from xtra. amt guaranteed in 0 .. 63. */
7367static
7368IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7369{
7370 /* if amt == 0
7371 then base
7372 else (base << amt) | (xtra >>u (64-amt))
7373 */
7374 return
7375 IRExpr_Mux0X(
7376 mkexpr(amt),
7377 mkexpr(base),
7378 binop(Iop_Or64,
7379 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7380 binop(Iop_Shr64, mkexpr(xtra),
7381 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7382 )
7383 );
7384}
7385
7386/* Generate base >>u amt with vacated places filled with stuff
7387 from xtra. amt guaranteed in 0 .. 63. */
7388static
7389IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7390{
7391 /* if amt == 0
7392 then base
7393 else (base >>u amt) | (xtra << (64-amt))
7394 */
7395 return
7396 IRExpr_Mux0X(
7397 mkexpr(amt),
7398 mkexpr(base),
7399 binop(Iop_Or64,
7400 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7401 binop(Iop_Shl64, mkexpr(xtra),
7402 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7403 )
7404 );
7405}
7406
7407/* Double length left and right shifts. Apparently only required in
7408 v-size (no b- variant). */
7409static
sewardj2e28ac42008-12-04 00:05:12 +00007410ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
7411 Prefix pfx,
sewardj33ef9c22005-11-04 20:05:57 +00007412 Long delta, UChar modrm,
7413 Int sz,
7414 IRExpr* shift_amt,
7415 Bool amt_is_literal,
sewardjf5268432005-11-05 02:58:55 +00007416 HChar* shift_amt_txt,
sewardj33ef9c22005-11-04 20:05:57 +00007417 Bool left_shift )
7418{
7419 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7420 for printing it. And eip on entry points at the modrm byte. */
7421 Int len;
7422 HChar dis_buf[50];
7423
7424 IRType ty = szToITy(sz);
7425 IRTemp gsrc = newTemp(ty);
7426 IRTemp esrc = newTemp(ty);
7427 IRTemp addr = IRTemp_INVALID;
7428 IRTemp tmpSH = newTemp(Ity_I8);
7429 IRTemp tmpSS = newTemp(Ity_I8);
7430 IRTemp tmp64 = IRTemp_INVALID;
7431 IRTemp res64 = IRTemp_INVALID;
7432 IRTemp rss64 = IRTemp_INVALID;
7433 IRTemp resTy = IRTemp_INVALID;
7434 IRTemp rssTy = IRTemp_INVALID;
7435 Int mask = sz==8 ? 63 : 31;
7436
7437 vassert(sz == 2 || sz == 4 || sz == 8);
7438
7439 /* The E-part is the destination; this is shifted. The G-part
7440 supplies bits to be shifted into the E-part, but is not
7441 changed.
7442
7443 If shifting left, form a double-length word with E at the top
7444 and G at the bottom, and shift this left. The result is then in
7445 the high part.
7446
7447 If shifting right, form a double-length word with G at the top
7448 and E at the bottom, and shift this right. The result is then
7449 at the bottom. */
7450
7451 /* Fetch the operands. */
7452
7453 assign( gsrc, getIRegG(sz, pfx, modrm) );
7454
7455 if (epartIsReg(modrm)) {
7456 delta++;
7457 assign( esrc, getIRegE(sz, pfx, modrm) );
7458 DIP("sh%cd%c %s, %s, %s\n",
7459 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7460 shift_amt_txt,
7461 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7462 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007463 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj75ce3652005-11-04 20:49:36 +00007464 /* # bytes following amode */
7465 amt_is_literal ? 1 : 0 );
sewardj33ef9c22005-11-04 20:05:57 +00007466 delta += len;
7467 assign( esrc, loadLE(ty, mkexpr(addr)) );
7468 DIP("sh%cd%c %s, %s, %s\n",
7469 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7470 shift_amt_txt,
7471 nameIRegG(sz, pfx, modrm), dis_buf);
7472 }
7473
7474 /* Calculate the masked shift amount (tmpSH), the masked subshift
7475 amount (tmpSS), the shifted value (res64) and the subshifted
7476 value (rss64). */
7477
7478 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7479 assign( tmpSS, binop(Iop_And8,
7480 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7481 mkU8(mask)));
7482
7483 tmp64 = newTemp(Ity_I64);
7484 res64 = newTemp(Ity_I64);
7485 rss64 = newTemp(Ity_I64);
7486
7487 if (sz == 2 || sz == 4) {
7488
7489 /* G is xtra; E is data */
7490 /* what a freaking nightmare: */
7491 if (sz == 4 && left_shift) {
7492 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7493 assign( res64,
7494 binop(Iop_Shr64,
7495 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7496 mkU8(32)) );
7497 assign( rss64,
7498 binop(Iop_Shr64,
7499 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7500 mkU8(32)) );
7501 }
7502 else
7503 if (sz == 4 && !left_shift) {
7504 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7505 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7506 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7507 }
7508 else
7509 if (sz == 2 && left_shift) {
7510 assign( tmp64,
7511 binop(Iop_32HLto64,
7512 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7513 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7514 ));
7515 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
7516 assign( res64,
7517 binop(Iop_Shr64,
7518 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7519 mkU8(48)) );
7520 /* subshift formed by shifting [esrc'0000'0000'0000] */
7521 assign( rss64,
7522 binop(Iop_Shr64,
7523 binop(Iop_Shl64,
7524 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
7525 mkU8(48)),
7526 mkexpr(tmpSS)),
7527 mkU8(48)) );
7528 }
7529 else
7530 if (sz == 2 && !left_shift) {
7531 assign( tmp64,
7532 binop(Iop_32HLto64,
7533 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
7534 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
7535 ));
7536 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
7537 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7538 /* subshift formed by shifting [0000'0000'0000'esrc] */
7539 assign( rss64, binop(Iop_Shr64,
7540 unop(Iop_16Uto64, mkexpr(esrc)),
7541 mkexpr(tmpSS)) );
7542 }
7543
7544 } else {
7545
7546 vassert(sz == 8);
7547 if (left_shift) {
7548 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
7549 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
7550 } else {
7551 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
7552 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
7553 }
7554
7555 }
7556
7557 resTy = newTemp(ty);
7558 rssTy = newTemp(ty);
7559 assign( resTy, narrowTo(ty, mkexpr(res64)) );
7560 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
7561
7562 /* Put result back and write the flags thunk. */
7563 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
7564 resTy, rssTy, ty, tmpSH );
7565
7566 if (epartIsReg(modrm)) {
7567 putIRegE(sz, pfx, modrm, mkexpr(resTy));
7568 } else {
7569 storeLE( mkexpr(addr), mkexpr(resTy) );
7570 }
7571
7572 if (amt_is_literal) delta++;
7573 return delta;
7574}
sewardj9ed16802005-08-24 10:46:19 +00007575
7576
7577/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
7578 required. */
7579
7580typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
7581
7582static HChar* nameBtOp ( BtOp op )
7583{
7584 switch (op) {
7585 case BtOpNone: return "";
7586 case BtOpSet: return "s";
7587 case BtOpReset: return "r";
7588 case BtOpComp: return "c";
7589 default: vpanic("nameBtOp(amd64)");
7590 }
7591}
7592
7593
7594static
sewardj2e28ac42008-12-04 00:05:12 +00007595ULong dis_bt_G_E ( VexAbiInfo* vbi,
7596 Prefix pfx, Int sz, Long delta, BtOp op )
sewardj9ed16802005-08-24 10:46:19 +00007597{
7598 HChar dis_buf[50];
7599 UChar modrm;
7600 Int len;
7601 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
sewardje9d8a262009-07-01 08:06:34 +00007602 t_addr1, t_rsp, t_mask, t_new;
sewardj9ed16802005-08-24 10:46:19 +00007603
7604 vassert(sz == 2 || sz == 4 || sz == 8);
7605
7606 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
sewardje9d8a262009-07-01 08:06:34 +00007607 = t_addr0 = t_addr1 = t_rsp
7608 = t_mask = t_new = IRTemp_INVALID;
sewardj9ed16802005-08-24 10:46:19 +00007609
7610 t_fetched = newTemp(Ity_I8);
sewardje9d8a262009-07-01 08:06:34 +00007611 t_new = newTemp(Ity_I8);
sewardj9ed16802005-08-24 10:46:19 +00007612 t_bitno0 = newTemp(Ity_I64);
7613 t_bitno1 = newTemp(Ity_I64);
7614 t_bitno2 = newTemp(Ity_I8);
7615 t_addr1 = newTemp(Ity_I64);
7616 modrm = getUChar(delta);
7617
7618 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
7619
7620 if (epartIsReg(modrm)) {
7621 delta++;
sewardj02834302010-07-29 18:10:51 +00007622 /* Get it onto the client's stack. Oh, this is a horrible
7623 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
7624 Because of the ELF ABI stack redzone, there may be live data
7625 up to 128 bytes below %RSP. So we can't just push it on the
7626 stack, else we may wind up trashing live data, and causing
7627 impossible-to-find simulation errors. (Yes, this did
7628 happen.) So we need to drop RSP before at least 128 before
7629 pushing it. That unfortunately means hitting Memcheck's
7630 fast-case painting code. Ideally we should drop more than
7631 128, to reduce the chances of breaking buggy programs that
7632 have live data below -128(%RSP). Memcheck fast-cases moves
7633 of 288 bytes due to the need to handle ppc64-linux quickly,
7634 so let's use 288. Of course the real fix is to get rid of
7635 this kludge entirely. */
sewardj9ed16802005-08-24 10:46:19 +00007636 t_rsp = newTemp(Ity_I64);
7637 t_addr0 = newTemp(Ity_I64);
7638
sewardj02834302010-07-29 18:10:51 +00007639 vassert(vbi->guest_stack_redzone_size == 128);
7640 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00007641 putIReg64(R_RSP, mkexpr(t_rsp));
7642
7643 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
7644
7645 /* Make t_addr0 point at it. */
7646 assign( t_addr0, mkexpr(t_rsp) );
7647
7648 /* Mask out upper bits of the shift amount, since we're doing a
7649 reg. */
7650 assign( t_bitno1, binop(Iop_And64,
7651 mkexpr(t_bitno0),
7652 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
7653
7654 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007655 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj9ed16802005-08-24 10:46:19 +00007656 delta += len;
7657 assign( t_bitno1, mkexpr(t_bitno0) );
7658 }
7659
7660 /* At this point: t_addr0 is the address being operated on. If it
7661 was a reg, we will have pushed it onto the client's stack.
7662 t_bitno1 is the bit number, suitably masked in the case of a
7663 reg. */
7664
7665 /* Now the main sequence. */
7666 assign( t_addr1,
7667 binop(Iop_Add64,
7668 mkexpr(t_addr0),
7669 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
7670
7671 /* t_addr1 now holds effective address */
7672
7673 assign( t_bitno2,
7674 unop(Iop_64to8,
7675 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
7676
7677 /* t_bitno2 contains offset of bit within byte */
7678
7679 if (op != BtOpNone) {
7680 t_mask = newTemp(Ity_I8);
7681 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
7682 }
7683
7684 /* t_mask is now a suitable byte mask */
7685
7686 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
7687
7688 if (op != BtOpNone) {
7689 switch (op) {
sewardje9d8a262009-07-01 08:06:34 +00007690 case BtOpSet:
7691 assign( t_new,
7692 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00007693 break;
sewardje9d8a262009-07-01 08:06:34 +00007694 case BtOpComp:
7695 assign( t_new,
7696 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00007697 break;
sewardje9d8a262009-07-01 08:06:34 +00007698 case BtOpReset:
7699 assign( t_new,
7700 binop(Iop_And8, mkexpr(t_fetched),
7701 unop(Iop_Not8, mkexpr(t_mask))) );
sewardj9ed16802005-08-24 10:46:19 +00007702 break;
7703 default:
7704 vpanic("dis_bt_G_E(amd64)");
7705 }
sewardje9d8a262009-07-01 08:06:34 +00007706 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
7707 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
7708 mkexpr(t_new)/*new*/,
7709 guest_RIP_curr_instr );
7710 } else {
7711 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
7712 }
sewardj9ed16802005-08-24 10:46:19 +00007713 }
sewardje9d8a262009-07-01 08:06:34 +00007714
sewardj9ed16802005-08-24 10:46:19 +00007715 /* Side effect done; now get selected bit into Carry flag */
7716 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
7717 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7718 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7719 stmt( IRStmt_Put(
7720 OFFB_CC_DEP1,
7721 binop(Iop_And64,
7722 binop(Iop_Shr64,
7723 unop(Iop_8Uto64, mkexpr(t_fetched)),
7724 mkexpr(t_bitno2)),
7725 mkU64(1)))
7726 );
7727 /* Set NDEP even though it isn't used. This makes redundant-PUT
7728 elimination of previous stores to this field work better. */
7729 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7730
7731 /* Move reg operand from stack back to reg */
7732 if (epartIsReg(modrm)) {
sewardje9d8a262009-07-01 08:06:34 +00007733 /* t_rsp still points at it. */
sewardj25d23862006-05-12 17:47:21 +00007734 /* only write the reg if actually modifying it; doing otherwise
7735 zeroes the top half erroneously when doing btl due to
7736 standard zero-extend rule */
sewardje9d8a262009-07-01 08:06:34 +00007737 if (op != BtOpNone)
sewardj25d23862006-05-12 17:47:21 +00007738 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
sewardj02834302010-07-29 18:10:51 +00007739 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00007740 }
7741
7742 DIP("bt%s%c %s, %s\n",
7743 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
7744 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
7745
7746 return delta;
7747}
sewardjf53b7352005-04-06 20:01:56 +00007748
7749
7750
7751/* Handle BSF/BSR. Only v-size seems necessary. */
7752static
sewardj2e28ac42008-12-04 00:05:12 +00007753ULong dis_bs_E_G ( VexAbiInfo* vbi,
7754 Prefix pfx, Int sz, Long delta, Bool fwds )
sewardjf53b7352005-04-06 20:01:56 +00007755{
7756 Bool isReg;
7757 UChar modrm;
7758 HChar dis_buf[50];
7759
7760 IRType ty = szToITy(sz);
7761 IRTemp src = newTemp(ty);
7762 IRTemp dst = newTemp(ty);
7763 IRTemp src64 = newTemp(Ity_I64);
7764 IRTemp dst64 = newTemp(Ity_I64);
7765 IRTemp src8 = newTemp(Ity_I8);
7766
7767 vassert(sz == 8 || sz == 4 || sz == 2);
7768
7769 modrm = getUChar(delta);
7770 isReg = epartIsReg(modrm);
7771 if (isReg) {
7772 delta++;
7773 assign( src, getIRegE(sz, pfx, modrm) );
7774 } else {
7775 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00007776 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjf53b7352005-04-06 20:01:56 +00007777 delta += len;
7778 assign( src, loadLE(ty, mkexpr(addr)) );
7779 }
7780
7781 DIP("bs%c%c %s, %s\n",
7782 fwds ? 'f' : 'r', nameISize(sz),
7783 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
7784 nameIRegG(sz, pfx, modrm));
7785
7786 /* First, widen src to 64 bits if it is not already. */
7787 assign( src64, widenUto64(mkexpr(src)) );
7788
7789 /* Generate an 8-bit expression which is zero iff the
7790 original is zero, and nonzero otherwise */
7791 assign( src8,
7792 unop(Iop_1Uto8,
7793 binop(Iop_CmpNE64,
7794 mkexpr(src64), mkU64(0))) );
7795
7796 /* Flags: Z is 1 iff source value is zero. All others
7797 are undefined -- we force them to zero. */
7798 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7799 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7800 stmt( IRStmt_Put(
7801 OFFB_CC_DEP1,
7802 IRExpr_Mux0X( mkexpr(src8),
7803 /* src==0 */
7804 mkU64(AMD64G_CC_MASK_Z),
7805 /* src!=0 */
7806 mkU64(0)
7807 )
7808 ));
7809 /* Set NDEP even though it isn't used. This makes redundant-PUT
7810 elimination of previous stores to this field work better. */
7811 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7812
7813 /* Result: iff source value is zero, we can't use
7814 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
7815 But anyway, amd64 semantics say the result is undefined in
7816 such situations. Hence handle the zero case specially. */
7817
7818 /* Bleh. What we compute:
7819
7820 bsf64: if src == 0 then {dst is unchanged}
7821 else Ctz64(src)
7822
7823 bsr64: if src == 0 then {dst is unchanged}
7824 else 63 - Clz64(src)
7825
7826 bsf32: if src == 0 then {dst is unchanged}
7827 else Ctz64(32Uto64(src))
7828
7829 bsr32: if src == 0 then {dst is unchanged}
7830 else 63 - Clz64(32Uto64(src))
7831
7832 bsf16: if src == 0 then {dst is unchanged}
7833 else Ctz64(32Uto64(16Uto32(src)))
7834
7835 bsr16: if src == 0 then {dst is unchanged}
7836 else 63 - Clz64(32Uto64(16Uto32(src)))
7837 */
7838
7839 /* The main computation, guarding against zero. */
7840 assign( dst64,
7841 IRExpr_Mux0X(
7842 mkexpr(src8),
7843 /* src == 0 -- leave dst unchanged */
7844 widenUto64( getIRegG( sz, pfx, modrm ) ),
7845 /* src != 0 */
7846 fwds ? unop(Iop_Ctz64, mkexpr(src64))
7847 : binop(Iop_Sub64,
7848 mkU64(63),
7849 unop(Iop_Clz64, mkexpr(src64)))
7850 )
7851 );
7852
7853 if (sz == 2)
sewardje58967e2005-04-27 11:50:56 +00007854 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
sewardjf53b7352005-04-06 20:01:56 +00007855 else
7856 if (sz == 4)
7857 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
7858 else
7859 assign( dst, mkexpr(dst64) );
7860
7861 /* dump result back */
7862 putIRegG( sz, pfx, modrm, mkexpr(dst) );
7863
7864 return delta;
7865}
sewardja6b93d12005-02-17 09:28:28 +00007866
7867
7868/* swap rAX with the reg specified by reg and REX.B */
7869static
sewardj5b470602005-02-27 13:10:48 +00007870void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
sewardja6b93d12005-02-17 09:28:28 +00007871{
7872 IRType ty = szToITy(sz);
7873 IRTemp t1 = newTemp(ty);
7874 IRTemp t2 = newTemp(ty);
tom0fb4cbd2011-08-10 12:58:03 +00007875 vassert(sz == 2 || sz == 4 || sz == 8);
sewardj5b470602005-02-27 13:10:48 +00007876 vassert(regLo3 < 8);
sewardj2d4fcd52005-05-18 11:47:47 +00007877 if (sz == 8) {
7878 assign( t1, getIReg64(R_RAX) );
7879 assign( t2, getIRegRexB(8, pfx, regLo3) );
7880 putIReg64( R_RAX, mkexpr(t2) );
7881 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00007882 } else if (sz == 4) {
sewardj2d4fcd52005-05-18 11:47:47 +00007883 assign( t1, getIReg32(R_RAX) );
7884 assign( t2, getIRegRexB(4, pfx, regLo3) );
7885 putIReg32( R_RAX, mkexpr(t2) );
7886 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00007887 } else {
7888 assign( t1, getIReg16(R_RAX) );
7889 assign( t2, getIRegRexB(2, pfx, regLo3) );
7890 putIReg16( R_RAX, mkexpr(t2) );
7891 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
sewardj2d4fcd52005-05-18 11:47:47 +00007892 }
sewardja6b93d12005-02-17 09:28:28 +00007893 DIP("xchg%c %s, %s\n",
sewardj5b470602005-02-27 13:10:48 +00007894 nameISize(sz), nameIRegRAX(sz),
sewardj2d4fcd52005-05-18 11:47:47 +00007895 nameIRegRexB(sz,pfx, regLo3));
sewardja6b93d12005-02-17 09:28:28 +00007896}
7897
7898
sewardj905edbd2007-04-07 12:25:37 +00007899static
7900void codegen_SAHF ( void )
7901{
7902 /* Set the flags to:
7903 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
7904 -- retain the old O flag
7905 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7906 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
7907 */
7908 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7909 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7910 IRTemp oldflags = newTemp(Ity_I64);
7911 assign( oldflags, mk_amd64g_calculate_rflags_all() );
7912 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7913 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7914 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7915 stmt( IRStmt_Put( OFFB_CC_DEP1,
7916 binop(Iop_Or64,
7917 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
7918 binop(Iop_And64,
7919 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
7920 mkU64(mask_SZACP))
7921 )
7922 ));
7923}
7924
7925
7926static
7927void codegen_LAHF ( void )
7928{
7929 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
7930 IRExpr* rax_with_hole;
7931 IRExpr* new_byte;
7932 IRExpr* new_rax;
7933 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
7934 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
7935
7936 IRTemp flags = newTemp(Ity_I64);
7937 assign( flags, mk_amd64g_calculate_rflags_all() );
7938
7939 rax_with_hole
7940 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
7941 new_byte
7942 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
7943 mkU64(1<<1));
7944 new_rax
7945 = binop(Iop_Or64, rax_with_hole,
7946 binop(Iop_Shl64, new_byte, mkU8(8)));
7947 putIReg64(R_RAX, new_rax);
7948}
7949
sewardja6b93d12005-02-17 09:28:28 +00007950
7951static
sewardjd0aa0a52006-08-17 01:20:01 +00007952ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
sewardj2e28ac42008-12-04 00:05:12 +00007953 VexAbiInfo* vbi,
sewardjd0aa0a52006-08-17 01:20:01 +00007954 Prefix pfx,
7955 Int size,
7956 Long delta0 )
sewardja6b93d12005-02-17 09:28:28 +00007957{
7958 HChar dis_buf[50];
7959 Int len;
7960
7961 IRType ty = szToITy(size);
7962 IRTemp acc = newTemp(ty);
7963 IRTemp src = newTemp(ty);
7964 IRTemp dest = newTemp(ty);
7965 IRTemp dest2 = newTemp(ty);
7966 IRTemp acc2 = newTemp(ty);
7967 IRTemp cond8 = newTemp(Ity_I8);
7968 IRTemp addr = IRTemp_INVALID;
7969 UChar rm = getUChar(delta0);
7970
sewardje9d8a262009-07-01 08:06:34 +00007971 /* There are 3 cases to consider:
7972
7973 reg-reg: ignore any lock prefix, generate sequence based
7974 on Mux0X
7975
7976 reg-mem, not locked: ignore any lock prefix, generate sequence
7977 based on Mux0X
7978
7979 reg-mem, locked: use IRCAS
7980 */
7981
sewardja6b93d12005-02-17 09:28:28 +00007982 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00007983 /* case 1 */
sewardj5b470602005-02-27 13:10:48 +00007984 assign( dest, getIRegE(size, pfx, rm) );
sewardja6b93d12005-02-17 09:28:28 +00007985 delta0++;
sewardje9d8a262009-07-01 08:06:34 +00007986 assign( src, getIRegG(size, pfx, rm) );
7987 assign( acc, getIRegRAX(size) );
7988 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
7989 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
7990 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
7991 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
7992 putIRegRAX(size, mkexpr(acc2));
7993 putIRegE(size, pfx, rm, mkexpr(dest2));
sewardja6b93d12005-02-17 09:28:28 +00007994 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00007995 nameIRegG(size,pfx,rm),
7996 nameIRegE(size,pfx,rm) );
sewardje9d8a262009-07-01 08:06:34 +00007997 }
7998 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
7999 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008000 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardja6b93d12005-02-17 09:28:28 +00008001 assign( dest, loadLE(ty, mkexpr(addr)) );
8002 delta0 += len;
sewardje9d8a262009-07-01 08:06:34 +00008003 assign( src, getIRegG(size, pfx, rm) );
8004 assign( acc, getIRegRAX(size) );
8005 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8006 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
8007 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
8008 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
8009 putIRegRAX(size, mkexpr(acc2));
8010 storeLE( mkexpr(addr), mkexpr(dest2) );
sewardja6b93d12005-02-17 09:28:28 +00008011 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00008012 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008013 }
sewardje9d8a262009-07-01 08:06:34 +00008014 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
8015 /* case 3 */
8016 /* src is new value. acc is expected value. dest is old value.
8017 Compute success from the output of the IRCAS, and steer the
8018 new value for RAX accordingly: in case of success, RAX is
8019 unchanged. */
8020 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8021 delta0 += len;
8022 assign( src, getIRegG(size, pfx, rm) );
8023 assign( acc, getIRegRAX(size) );
8024 stmt( IRStmt_CAS(
8025 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8026 NULL, mkexpr(acc), NULL, mkexpr(src) )
8027 ));
8028 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8029 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
8030 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
8031 putIRegRAX(size, mkexpr(acc2));
sewardj40d1d212009-07-12 13:01:17 +00008032 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8033 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008034 }
sewardje9d8a262009-07-01 08:06:34 +00008035 else vassert(0);
sewardja6b93d12005-02-17 09:28:28 +00008036
sewardjd0aa0a52006-08-17 01:20:01 +00008037 *ok = True;
sewardja6b93d12005-02-17 09:28:28 +00008038 return delta0;
8039}
8040
sewardj3ca55a12005-01-27 16:06:23 +00008041
8042/* Handle conditional move instructions of the form
8043 cmovcc E(reg-or-mem), G(reg)
8044
8045 E(src) is reg-or-mem
8046 G(dst) is reg.
8047
8048 If E is reg, --> GET %E, tmps
8049 GET %G, tmpd
8050 CMOVcc tmps, tmpd
8051 PUT tmpd, %G
8052
8053 If E is mem --> (getAddr E) -> tmpa
8054 LD (tmpa), tmps
8055 GET %G, tmpd
8056 CMOVcc tmps, tmpd
8057 PUT tmpd, %G
8058*/
8059static
sewardj2e28ac42008-12-04 00:05:12 +00008060ULong dis_cmov_E_G ( VexAbiInfo* vbi,
8061 Prefix pfx,
sewardj3ca55a12005-01-27 16:06:23 +00008062 Int sz,
8063 AMD64Condcode cond,
sewardj270def42005-07-03 01:03:01 +00008064 Long delta0 )
sewardj3ca55a12005-01-27 16:06:23 +00008065{
sewardj8c332e22005-01-28 01:36:56 +00008066 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00008067 HChar dis_buf[50];
8068 Int len;
8069
8070 IRType ty = szToITy(sz);
8071 IRTemp tmps = newTemp(ty);
8072 IRTemp tmpd = newTemp(ty);
8073
8074 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00008075 assign( tmps, getIRegE(sz, pfx, rm) );
8076 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008077
sewardj5b470602005-02-27 13:10:48 +00008078 putIRegG( sz, pfx, rm,
sewardj3ca55a12005-01-27 16:06:23 +00008079 IRExpr_Mux0X( unop(Iop_1Uto8,
8080 mk_amd64g_calculate_condition(cond)),
8081 mkexpr(tmpd),
8082 mkexpr(tmps) )
8083 );
sewardje941eea2005-01-30 19:52:28 +00008084 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
sewardj5b470602005-02-27 13:10:48 +00008085 nameIRegE(sz,pfx,rm),
8086 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008087 return 1+delta0;
8088 }
8089
8090 /* E refers to memory */
8091 {
sewardj2e28ac42008-12-04 00:05:12 +00008092 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00008093 assign( tmps, loadLE(ty, mkexpr(addr)) );
sewardj5b470602005-02-27 13:10:48 +00008094 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008095
sewardj5b470602005-02-27 13:10:48 +00008096 putIRegG( sz, pfx, rm,
sewardj3ca55a12005-01-27 16:06:23 +00008097 IRExpr_Mux0X( unop(Iop_1Uto8,
8098 mk_amd64g_calculate_condition(cond)),
8099 mkexpr(tmpd),
8100 mkexpr(tmps) )
8101 );
8102
sewardj7eaa7cf2005-01-31 18:55:22 +00008103 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8104 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00008105 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008106 return len+delta0;
8107 }
8108}
8109
8110
sewardjb4fd2e72005-03-23 13:34:11 +00008111static
8112ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
sewardj2e28ac42008-12-04 00:05:12 +00008113 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008114 Prefix pfx, Int sz, Long delta0 )
sewardjb4fd2e72005-03-23 13:34:11 +00008115{
8116 Int len;
8117 UChar rm = getUChar(delta0);
8118 HChar dis_buf[50];
8119
8120 IRType ty = szToITy(sz);
8121 IRTemp tmpd = newTemp(ty);
8122 IRTemp tmpt0 = newTemp(ty);
8123 IRTemp tmpt1 = newTemp(ty);
sewardje9d8a262009-07-01 08:06:34 +00008124
8125 /* There are 3 cases to consider:
8126
sewardjc2433a82010-05-10 20:51:22 +00008127 reg-reg: ignore any lock prefix,
8128 generate 'naive' (non-atomic) sequence
sewardje9d8a262009-07-01 08:06:34 +00008129
8130 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8131 (non-atomic) sequence
8132
8133 reg-mem, locked: use IRCAS
8134 */
sewardjb4fd2e72005-03-23 13:34:11 +00008135
8136 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00008137 /* case 1 */
sewardjc2433a82010-05-10 20:51:22 +00008138 assign( tmpd, getIRegE(sz, pfx, rm) );
8139 assign( tmpt0, getIRegG(sz, pfx, rm) );
8140 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8141 mkexpr(tmpd), mkexpr(tmpt0)) );
8142 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8143 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8144 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8145 DIP("xadd%c %s, %s\n",
8146 nameISize(sz), nameIRegG(sz,pfx,rm),
8147 nameIRegE(sz,pfx,rm));
8148 *decode_ok = True;
8149 return 1+delta0;
sewardje9d8a262009-07-01 08:06:34 +00008150 }
8151 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
8152 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008153 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardjb4fd2e72005-03-23 13:34:11 +00008154 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8155 assign( tmpt0, getIRegG(sz, pfx, rm) );
sewardje9d8a262009-07-01 08:06:34 +00008156 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8157 mkexpr(tmpd), mkexpr(tmpt0)) );
sewardjb4fd2e72005-03-23 13:34:11 +00008158 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8159 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8160 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8161 DIP("xadd%c %s, %s\n",
8162 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
sewardje9d8a262009-07-01 08:06:34 +00008163 *decode_ok = True;
sewardjb4fd2e72005-03-23 13:34:11 +00008164 return len+delta0;
8165 }
sewardje9d8a262009-07-01 08:06:34 +00008166 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
8167 /* case 3 */
8168 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8169 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8170 assign( tmpt0, getIRegG(sz, pfx, rm) );
8171 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8172 mkexpr(tmpd), mkexpr(tmpt0)) );
8173 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8174 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8175 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8176 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8177 DIP("xadd%c %s, %s\n",
8178 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8179 *decode_ok = True;
8180 return len+delta0;
8181 }
8182 /*UNREACHED*/
8183 vassert(0);
sewardjb4fd2e72005-03-23 13:34:11 +00008184}
8185
sewardjd20c8852005-01-20 20:04:07 +00008186//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8187//..
8188//.. static
sewardj270def42005-07-03 01:03:01 +00008189//.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
sewardjd20c8852005-01-20 20:04:07 +00008190//.. {
8191//.. Int len;
8192//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008193//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008194//.. HChar dis_buf[50];
8195//..
8196//.. if (epartIsReg(rm)) {
8197//.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8198//.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8199//.. return 1+delta0;
8200//.. } else {
8201//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8202//.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8203//.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8204//.. return len+delta0;
8205//.. }
8206//.. }
8207//..
8208//.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8209//.. dst is ireg and sz==4, zero out top half of it. */
8210//..
8211//.. static
8212//.. UInt dis_mov_Sw_Ew ( UChar sorb,
8213//.. Int sz,
8214//.. UInt delta0 )
8215//.. {
8216//.. Int len;
8217//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008218//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008219//.. HChar dis_buf[50];
8220//..
8221//.. vassert(sz == 2 || sz == 4);
8222//..
8223//.. if (epartIsReg(rm)) {
8224//.. if (sz == 4)
8225//.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8226//.. else
8227//.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8228//..
8229//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8230//.. return 1+delta0;
8231//.. } else {
8232//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8233//.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8234//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8235//.. return len+delta0;
8236//.. }
8237//.. }
8238//..
8239//..
8240//.. static
8241//.. void dis_push_segreg ( UInt sreg, Int sz )
8242//.. {
8243//.. IRTemp t1 = newTemp(Ity_I16);
8244//.. IRTemp ta = newTemp(Ity_I32);
8245//.. vassert(sz == 2 || sz == 4);
8246//..
8247//.. assign( t1, getSReg(sreg) );
8248//.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8249//.. putIReg(4, R_ESP, mkexpr(ta));
8250//.. storeLE( mkexpr(ta), mkexpr(t1) );
8251//..
8252//.. DIP("pushw %s\n", nameSReg(sreg));
8253//.. }
8254//..
8255//.. static
8256//.. void dis_pop_segreg ( UInt sreg, Int sz )
8257//.. {
8258//.. IRTemp t1 = newTemp(Ity_I16);
8259//.. IRTemp ta = newTemp(Ity_I32);
8260//.. vassert(sz == 2 || sz == 4);
8261//..
8262//.. assign( ta, getIReg(4, R_ESP) );
8263//.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8264//..
8265//.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8266//.. putSReg( sreg, mkexpr(t1) );
8267//.. DIP("pop %s\n", nameSReg(sreg));
8268//.. }
sewardj2f959cc2005-01-26 01:19:35 +00008269
8270static
sewardjc6f970f2012-04-02 21:54:49 +00008271void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 )
sewardj2f959cc2005-01-26 01:19:35 +00008272{
8273 IRTemp t1 = newTemp(Ity_I64);
8274 IRTemp t2 = newTemp(Ity_I64);
sewardj5a9ffab2005-05-12 17:55:01 +00008275 IRTemp t3 = newTemp(Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00008276 assign(t1, getIReg64(R_RSP));
8277 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
sewardj5a9ffab2005-05-12 17:55:01 +00008278 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8279 putIReg64(R_RSP, mkexpr(t3));
sewardj478646f2008-05-01 20:13:04 +00008280 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
sewardjc6f970f2012-04-02 21:54:49 +00008281 jmp_treg(dres, Ijk_Ret, t2);
8282 vassert(dres->whatNext == Dis_StopHere);
sewardj2f959cc2005-01-26 01:19:35 +00008283}
8284
sewardj5b470602005-02-27 13:10:48 +00008285
sewardj1001dc42005-02-21 08:25:55 +00008286/*------------------------------------------------------------*/
8287/*--- SSE/SSE2/SSE3 helpers ---*/
8288/*------------------------------------------------------------*/
8289
8290/* Worker function; do not call directly.
8291 Handles full width G = G `op` E and G = (not G) `op` E.
8292*/
8293
sewardj8d965312005-02-25 02:48:47 +00008294static ULong dis_SSE_E_to_G_all_wrk (
sewardj2e28ac42008-12-04 00:05:12 +00008295 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008296 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008297 HChar* opname, IROp op,
8298 Bool invertG
8299 )
sewardj9da16972005-02-21 13:58:26 +00008300{
8301 HChar dis_buf[50];
8302 Int alen;
8303 IRTemp addr;
8304 UChar rm = getUChar(delta);
8305 IRExpr* gpart
8306 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8307 : getXMMReg(gregOfRexRM(pfx,rm));
8308 if (epartIsReg(rm)) {
8309 putXMMReg( gregOfRexRM(pfx,rm),
8310 binop(op, gpart,
8311 getXMMReg(eregOfRexRM(pfx,rm))) );
8312 DIP("%s %s,%s\n", opname,
8313 nameXMMReg(eregOfRexRM(pfx,rm)),
8314 nameXMMReg(gregOfRexRM(pfx,rm)) );
8315 return delta+1;
8316 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008317 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj9da16972005-02-21 13:58:26 +00008318 putXMMReg( gregOfRexRM(pfx,rm),
8319 binop(op, gpart,
8320 loadLE(Ity_V128, mkexpr(addr))) );
8321 DIP("%s %s,%s\n", opname,
8322 dis_buf,
8323 nameXMMReg(gregOfRexRM(pfx,rm)) );
8324 return delta+alen;
8325 }
8326}
8327
8328
8329/* All lanes SSE binary operation, G = G `op` E. */
8330
8331static
sewardj2e28ac42008-12-04 00:05:12 +00008332ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi,
8333 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008334 HChar* opname, IROp op )
sewardj9da16972005-02-21 13:58:26 +00008335{
sewardj2e28ac42008-12-04 00:05:12 +00008336 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
sewardj9da16972005-02-21 13:58:26 +00008337}
8338
sewardj8d965312005-02-25 02:48:47 +00008339/* All lanes SSE binary operation, G = (not G) `op` E. */
8340
8341static
sewardj2e28ac42008-12-04 00:05:12 +00008342ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi,
8343 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008344 HChar* opname, IROp op )
8345{
sewardj2e28ac42008-12-04 00:05:12 +00008346 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
sewardj8d965312005-02-25 02:48:47 +00008347}
8348
8349
8350/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8351
sewardj2e28ac42008-12-04 00:05:12 +00008352static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi,
8353 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008354 HChar* opname, IROp op )
8355{
8356 HChar dis_buf[50];
8357 Int alen;
8358 IRTemp addr;
8359 UChar rm = getUChar(delta);
sewardj9c9ee3d2005-02-26 01:17:42 +00008360 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
sewardj8d965312005-02-25 02:48:47 +00008361 if (epartIsReg(rm)) {
sewardj9c9ee3d2005-02-26 01:17:42 +00008362 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00008363 binop(op, gpart,
8364 getXMMReg(eregOfRexRM(pfx,rm))) );
8365 DIP("%s %s,%s\n", opname,
8366 nameXMMReg(eregOfRexRM(pfx,rm)),
8367 nameXMMReg(gregOfRexRM(pfx,rm)) );
8368 return delta+1;
8369 } else {
8370 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8371 E operand needs to be made simply of zeroes. */
8372 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008373 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00008374 assign( epart, unop( Iop_32UtoV128,
8375 loadLE(Ity_I32, mkexpr(addr))) );
8376 putXMMReg( gregOfRexRM(pfx,rm),
8377 binop(op, gpart, mkexpr(epart)) );
8378 DIP("%s %s,%s\n", opname,
8379 dis_buf,
8380 nameXMMReg(gregOfRexRM(pfx,rm)) );
8381 return delta+alen;
8382 }
8383}
sewardj1001dc42005-02-21 08:25:55 +00008384
8385
8386/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
8387
sewardj2e28ac42008-12-04 00:05:12 +00008388static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi,
8389 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008390 HChar* opname, IROp op )
sewardj1001dc42005-02-21 08:25:55 +00008391{
8392 HChar dis_buf[50];
8393 Int alen;
8394 IRTemp addr;
8395 UChar rm = getUChar(delta);
8396 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8397 if (epartIsReg(rm)) {
8398 putXMMReg( gregOfRexRM(pfx,rm),
8399 binop(op, gpart,
8400 getXMMReg(eregOfRexRM(pfx,rm))) );
8401 DIP("%s %s,%s\n", opname,
8402 nameXMMReg(eregOfRexRM(pfx,rm)),
8403 nameXMMReg(gregOfRexRM(pfx,rm)) );
8404 return delta+1;
8405 } else {
8406 /* We can only do a 64-bit memory read, so the upper half of the
8407 E operand needs to be made simply of zeroes. */
8408 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008409 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008410 assign( epart, unop( Iop_64UtoV128,
8411 loadLE(Ity_I64, mkexpr(addr))) );
8412 putXMMReg( gregOfRexRM(pfx,rm),
8413 binop(op, gpart, mkexpr(epart)) );
8414 DIP("%s %s,%s\n", opname,
8415 dis_buf,
8416 nameXMMReg(gregOfRexRM(pfx,rm)) );
8417 return delta+alen;
8418 }
8419}
8420
8421
sewardja7ba8c42005-05-10 20:08:34 +00008422/* All lanes unary SSE operation, G = op(E). */
8423
8424static ULong dis_SSE_E_to_G_unary_all (
sewardj2e28ac42008-12-04 00:05:12 +00008425 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008426 Prefix pfx, Long delta,
sewardja7ba8c42005-05-10 20:08:34 +00008427 HChar* opname, IROp op
8428 )
8429{
8430 HChar dis_buf[50];
8431 Int alen;
8432 IRTemp addr;
8433 UChar rm = getUChar(delta);
8434 if (epartIsReg(rm)) {
8435 putXMMReg( gregOfRexRM(pfx,rm),
8436 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
8437 DIP("%s %s,%s\n", opname,
8438 nameXMMReg(eregOfRexRM(pfx,rm)),
8439 nameXMMReg(gregOfRexRM(pfx,rm)) );
8440 return delta+1;
8441 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008442 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008443 putXMMReg( gregOfRexRM(pfx,rm),
8444 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
8445 DIP("%s %s,%s\n", opname,
8446 dis_buf,
8447 nameXMMReg(gregOfRexRM(pfx,rm)) );
8448 return delta+alen;
8449 }
8450}
8451
8452
8453/* Lowest 32-bit lane only unary SSE operation, G = op(E). */
8454
8455static ULong dis_SSE_E_to_G_unary_lo32 (
sewardj2e28ac42008-12-04 00:05:12 +00008456 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008457 Prefix pfx, Long delta,
sewardja7ba8c42005-05-10 20:08:34 +00008458 HChar* opname, IROp op
8459 )
8460{
8461 /* First we need to get the old G value and patch the low 32 bits
8462 of the E operand into it. Then apply op and write back to G. */
8463 HChar dis_buf[50];
8464 Int alen;
8465 IRTemp addr;
8466 UChar rm = getUChar(delta);
8467 IRTemp oldG0 = newTemp(Ity_V128);
8468 IRTemp oldG1 = newTemp(Ity_V128);
8469
8470 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8471
8472 if (epartIsReg(rm)) {
8473 assign( oldG1,
8474 binop( Iop_SetV128lo32,
8475 mkexpr(oldG0),
8476 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
8477 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8478 DIP("%s %s,%s\n", opname,
8479 nameXMMReg(eregOfRexRM(pfx,rm)),
8480 nameXMMReg(gregOfRexRM(pfx,rm)) );
8481 return delta+1;
8482 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008484 assign( oldG1,
8485 binop( Iop_SetV128lo32,
8486 mkexpr(oldG0),
8487 loadLE(Ity_I32, mkexpr(addr)) ));
8488 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8489 DIP("%s %s,%s\n", opname,
8490 dis_buf,
8491 nameXMMReg(gregOfRexRM(pfx,rm)) );
8492 return delta+alen;
8493 }
8494}
sewardj1001dc42005-02-21 08:25:55 +00008495
8496
8497/* Lowest 64-bit lane only unary SSE operation, G = op(E). */
8498
sewardj8d965312005-02-25 02:48:47 +00008499static ULong dis_SSE_E_to_G_unary_lo64 (
sewardj2e28ac42008-12-04 00:05:12 +00008500 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008501 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008502 HChar* opname, IROp op
8503 )
sewardj1001dc42005-02-21 08:25:55 +00008504{
8505 /* First we need to get the old G value and patch the low 64 bits
8506 of the E operand into it. Then apply op and write back to G. */
8507 HChar dis_buf[50];
8508 Int alen;
8509 IRTemp addr;
8510 UChar rm = getUChar(delta);
8511 IRTemp oldG0 = newTemp(Ity_V128);
8512 IRTemp oldG1 = newTemp(Ity_V128);
8513
8514 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8515
8516 if (epartIsReg(rm)) {
8517 assign( oldG1,
8518 binop( Iop_SetV128lo64,
8519 mkexpr(oldG0),
8520 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
8521 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8522 DIP("%s %s,%s\n", opname,
8523 nameXMMReg(eregOfRexRM(pfx,rm)),
8524 nameXMMReg(gregOfRexRM(pfx,rm)) );
8525 return delta+1;
8526 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008527 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008528 assign( oldG1,
8529 binop( Iop_SetV128lo64,
8530 mkexpr(oldG0),
8531 loadLE(Ity_I64, mkexpr(addr)) ));
8532 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8533 DIP("%s %s,%s\n", opname,
8534 dis_buf,
8535 nameXMMReg(gregOfRexRM(pfx,rm)) );
8536 return delta+alen;
8537 }
8538}
8539
8540
sewardj09717342005-05-05 21:34:02 +00008541/* SSE integer binary operation:
8542 G = G `op` E (eLeft == False)
8543 G = E `op` G (eLeft == True)
8544*/
8545static ULong dis_SSEint_E_to_G(
sewardj2e28ac42008-12-04 00:05:12 +00008546 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008547 Prefix pfx, Long delta,
sewardj09717342005-05-05 21:34:02 +00008548 HChar* opname, IROp op,
8549 Bool eLeft
8550 )
8551{
8552 HChar dis_buf[50];
8553 Int alen;
8554 IRTemp addr;
8555 UChar rm = getUChar(delta);
8556 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8557 IRExpr* epart = NULL;
8558 if (epartIsReg(rm)) {
8559 epart = getXMMReg(eregOfRexRM(pfx,rm));
8560 DIP("%s %s,%s\n", opname,
8561 nameXMMReg(eregOfRexRM(pfx,rm)),
8562 nameXMMReg(gregOfRexRM(pfx,rm)) );
8563 delta += 1;
8564 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008565 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj09717342005-05-05 21:34:02 +00008566 epart = loadLE(Ity_V128, mkexpr(addr));
8567 DIP("%s %s,%s\n", opname,
8568 dis_buf,
8569 nameXMMReg(gregOfRexRM(pfx,rm)) );
8570 delta += alen;
8571 }
8572 putXMMReg( gregOfRexRM(pfx,rm),
8573 eLeft ? binop(op, epart, gpart)
8574 : binop(op, gpart, epart) );
8575 return delta;
8576}
sewardj8d965312005-02-25 02:48:47 +00008577
8578
sewardjc4530ae2012-05-21 10:18:49 +00008579/* Helper for doing SSE FP comparisons. False return ==> unhandled.
8580 This is all a bit of a kludge in that it ignores the subtleties of
8581 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
8582 spec. */
8583static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP,
8584 /*OUT*/IROp* opP,
8585 /*OUT*/Bool* postNotP,
8586 UInt imm8, Bool all_lanes, Int sz )
sewardj8d965312005-02-25 02:48:47 +00008587{
sewardjc4530ae2012-05-21 10:18:49 +00008588 if (imm8 >= 32) return False;
8589
8590 /* First, compute a (preSwap, op, postNot) triple from
8591 the supplied imm8. */
8592 Bool pre = False;
8593 IROp op = Iop_INVALID;
8594 Bool not = False;
8595
8596# define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; }
sewardjd698a052012-06-25 07:40:54 +00008597 // If you add a case here, add a corresponding test for both VCMPSD_128
8598 // and VCMPSS_128 in avx-1.c.
sewardjc4530ae2012-05-21 10:18:49 +00008599 switch (imm8) {
sewardj2f8c0b92012-07-14 14:20:00 +00008600 // "O" = ordered, "U" = unordered
8601 // "Q" = non-signalling (quiet), "S" = signalling
8602 //
8603 // swap operands?
8604 // |
8605 // | cmp op invert after?
8606 // | | |
8607 // v v v
sewardjd698a052012-06-25 07:40:54 +00008608 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
8609 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS
8610 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS
8611 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
8612 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
8613 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US
8614 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US
8615 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q
8616 // 0x8 EQ_UQ
sewardj2f8c0b92012-07-14 14:20:00 +00008617 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US
sewardjc4530ae2012-05-21 10:18:49 +00008618 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */
sewardjd698a052012-06-25 07:40:54 +00008619 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US
8620 // 0xB FALSE_OQ
sewardj68b01f72012-06-27 10:27:13 +00008621 // 0xC: this isn't really right because it returns all-1s when
8622 // either operand is a NaN, and it should return all-0s.
sewardjd698a052012-06-25 07:40:54 +00008623 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
8624 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS
8625 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS
8626 // 0xF TRUE_UQ
8627 // 0x10 EQ_OS
8628 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ
8629 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ
8630 // 0x13 UNORD_S
8631 // 0x14 NEQ_US
8632 // 0x15 NLT_UQ
8633 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
8634 // 0x17 ORD_S
8635 // 0x18 EQ_US
8636 // 0x19 NGE_UQ
8637 // 0x1A NGT_UQ
8638 // 0x1B FALSE_OS
8639 // 0x1C NEQ_OS
8640 // 0x1D GE_OQ
8641 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ
8642 // 0x1F TRUE_US
sewardjc93904b2012-05-27 13:50:42 +00008643 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
8644 avx-1.c if new cases turn up. */
sewardjc4530ae2012-05-21 10:18:49 +00008645 default: break;
8646 }
8647# undef XXX
8648 if (op == Iop_INVALID) return False;
8649
8650 /* Now convert the op into one with the same arithmetic but that is
8651 correct for the width and laneage requirements. */
8652
8653 /**/ if (sz == 4 && all_lanes) {
8654 switch (op) {
8655 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
8656 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
8657 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
8658 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
8659 default: vassert(0);
8660 }
8661 }
8662 else if (sz == 4 && !all_lanes) {
8663 switch (op) {
8664 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
8665 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
8666 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
8667 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
8668 default: vassert(0);
8669 }
8670 }
8671 else if (sz == 8 && all_lanes) {
8672 switch (op) {
8673 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
8674 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
8675 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
8676 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
8677 default: vassert(0);
8678 }
8679 }
8680 else if (sz == 8 && !all_lanes) {
8681 switch (op) {
8682 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
8683 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
8684 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
8685 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
8686 default: vassert(0);
8687 }
8688 }
8689 else {
8690 vpanic("findSSECmpOp(amd64,guest)");
sewardj8d965312005-02-25 02:48:47 +00008691 }
8692
sewardjc4530ae2012-05-21 10:18:49 +00008693 *preSwapP = pre; *opP = op; *postNotP = not;
8694 return True;
sewardj8d965312005-02-25 02:48:47 +00008695}
8696
sewardj8d965312005-02-25 02:48:47 +00008697
sewardjc4530ae2012-05-21 10:18:49 +00008698/* Handles SSE 32F/64F comparisons. It can fail, in which case it
8699 returns the original delta to indicate failure. */
8700
8701static Long dis_SSE_cmp_E_to_G ( VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008702 Prefix pfx, Long delta,
sewardj8d965312005-02-25 02:48:47 +00008703 HChar* opname, Bool all_lanes, Int sz )
8704{
sewardjc4530ae2012-05-21 10:18:49 +00008705 Long delta0 = delta;
sewardj8d965312005-02-25 02:48:47 +00008706 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +00008707 Int alen;
8708 UInt imm8;
sewardj8d965312005-02-25 02:48:47 +00008709 IRTemp addr;
sewardjc4530ae2012-05-21 10:18:49 +00008710 Bool preSwap = False;
sewardj8d965312005-02-25 02:48:47 +00008711 IROp op = Iop_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +00008712 Bool postNot = False;
sewardj8d965312005-02-25 02:48:47 +00008713 IRTemp plain = newTemp(Ity_V128);
8714 UChar rm = getUChar(delta);
8715 UShort mask = 0;
8716 vassert(sz == 4 || sz == 8);
8717 if (epartIsReg(rm)) {
8718 imm8 = getUChar(delta+1);
sewardjc4530ae2012-05-21 10:18:49 +00008719 if (imm8 >= 8) return delta0; /* FAIL */
8720 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
8721 if (!ok) return delta0; /* FAIL */
8722 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardj8d965312005-02-25 02:48:47 +00008723 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
8724 getXMMReg(eregOfRexRM(pfx,rm))) );
8725 delta += 2;
8726 DIP("%s $%d,%s,%s\n", opname,
8727 (Int)imm8,
8728 nameXMMReg(eregOfRexRM(pfx,rm)),
8729 nameXMMReg(gregOfRexRM(pfx,rm)) );
8730 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008731 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj8d965312005-02-25 02:48:47 +00008732 imm8 = getUChar(delta+alen);
sewardjc4530ae2012-05-21 10:18:49 +00008733 if (imm8 >= 8) return delta0; /* FAIL */
8734 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
8735 if (!ok) return delta0; /* FAIL */
8736 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardjab9055b2006-01-01 13:17:38 +00008737 assign( plain,
8738 binop(
8739 op,
8740 getXMMReg(gregOfRexRM(pfx,rm)),
sewardjc4530ae2012-05-21 10:18:49 +00008741 all_lanes
8742 ? loadLE(Ity_V128, mkexpr(addr))
8743 : sz == 8
8744 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
8745 : /*sz==4*/
8746 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
sewardjab9055b2006-01-01 13:17:38 +00008747 )
8748 );
sewardj8d965312005-02-25 02:48:47 +00008749 delta += alen+1;
8750 DIP("%s $%d,%s,%s\n", opname,
8751 (Int)imm8,
8752 dis_buf,
8753 nameXMMReg(gregOfRexRM(pfx,rm)) );
8754 }
8755
sewardjc4530ae2012-05-21 10:18:49 +00008756 if (postNot && all_lanes) {
sewardj9c9ee3d2005-02-26 01:17:42 +00008757 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00008758 unop(Iop_NotV128, mkexpr(plain)) );
8759 }
8760 else
sewardjc4530ae2012-05-21 10:18:49 +00008761 if (postNot && !all_lanes) {
sewardj1027dc22005-02-26 01:55:02 +00008762 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
sewardj8d965312005-02-25 02:48:47 +00008763 putXMMReg( gregOfRexRM(pfx,rm),
8764 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
8765 }
8766 else {
8767 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
8768 }
8769
8770 return delta;
8771}
8772
8773
sewardjadffcef2005-05-11 00:03:06 +00008774/* Vector by scalar shift of G by the amount specified at the bottom
8775 of E. */
8776
sewardj2e28ac42008-12-04 00:05:12 +00008777static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi,
8778 Prefix pfx, Long delta,
sewardjadffcef2005-05-11 00:03:06 +00008779 HChar* opname, IROp op )
8780{
8781 HChar dis_buf[50];
8782 Int alen, size;
8783 IRTemp addr;
8784 Bool shl, shr, sar;
8785 UChar rm = getUChar(delta);
8786 IRTemp g0 = newTemp(Ity_V128);
8787 IRTemp g1 = newTemp(Ity_V128);
sewardj4c0a7ac2012-06-21 09:08:19 +00008788 IRTemp amt = newTemp(Ity_I64);
sewardjadffcef2005-05-11 00:03:06 +00008789 IRTemp amt8 = newTemp(Ity_I8);
8790 if (epartIsReg(rm)) {
sewardj4c0a7ac2012-06-21 09:08:19 +00008791 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
sewardjadffcef2005-05-11 00:03:06 +00008792 DIP("%s %s,%s\n", opname,
8793 nameXMMReg(eregOfRexRM(pfx,rm)),
8794 nameXMMReg(gregOfRexRM(pfx,rm)) );
8795 delta++;
8796 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008797 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj4c0a7ac2012-06-21 09:08:19 +00008798 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
sewardjadffcef2005-05-11 00:03:06 +00008799 DIP("%s %s,%s\n", opname,
8800 dis_buf,
8801 nameXMMReg(gregOfRexRM(pfx,rm)) );
8802 delta += alen;
8803 }
8804 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
sewardj4c0a7ac2012-06-21 09:08:19 +00008805 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
sewardjadffcef2005-05-11 00:03:06 +00008806
8807 shl = shr = sar = False;
8808 size = 0;
8809 switch (op) {
8810 case Iop_ShlN16x8: shl = True; size = 32; break;
8811 case Iop_ShlN32x4: shl = True; size = 32; break;
8812 case Iop_ShlN64x2: shl = True; size = 64; break;
8813 case Iop_SarN16x8: sar = True; size = 16; break;
8814 case Iop_SarN32x4: sar = True; size = 32; break;
8815 case Iop_ShrN16x8: shr = True; size = 16; break;
8816 case Iop_ShrN32x4: shr = True; size = 32; break;
8817 case Iop_ShrN64x2: shr = True; size = 64; break;
8818 default: vassert(0);
8819 }
8820
8821 if (shl || shr) {
8822 assign(
8823 g1,
8824 IRExpr_Mux0X(
8825 unop(Iop_1Uto8,
sewardj4c0a7ac2012-06-21 09:08:19 +00008826 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))),
sewardjadffcef2005-05-11 00:03:06 +00008827 mkV128(0x0000),
8828 binop(op, mkexpr(g0), mkexpr(amt8))
8829 )
8830 );
8831 } else
8832 if (sar) {
8833 assign(
8834 g1,
8835 IRExpr_Mux0X(
8836 unop(Iop_1Uto8,
sewardj4c0a7ac2012-06-21 09:08:19 +00008837 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))),
sewardjadffcef2005-05-11 00:03:06 +00008838 binop(op, mkexpr(g0), mkU8(size-1)),
8839 binop(op, mkexpr(g0), mkexpr(amt8))
8840 )
8841 );
8842 } else {
8843 vassert(0);
8844 }
8845
8846 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
8847 return delta;
8848}
sewardj09717342005-05-05 21:34:02 +00008849
8850
8851/* Vector by scalar shift of E by an immediate byte. */
8852
8853static
8854ULong dis_SSE_shiftE_imm ( Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00008855 Long delta, HChar* opname, IROp op )
sewardj09717342005-05-05 21:34:02 +00008856{
8857 Bool shl, shr, sar;
8858 UChar rm = getUChar(delta);
8859 IRTemp e0 = newTemp(Ity_V128);
8860 IRTemp e1 = newTemp(Ity_V128);
8861 UChar amt, size;
8862 vassert(epartIsReg(rm));
8863 vassert(gregLO3ofRM(rm) == 2
8864 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00008865 amt = getUChar(delta+1);
sewardj09717342005-05-05 21:34:02 +00008866 delta += 2;
8867 DIP("%s $%d,%s\n", opname,
8868 (Int)amt,
8869 nameXMMReg(eregOfRexRM(pfx,rm)) );
8870 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
8871
8872 shl = shr = sar = False;
8873 size = 0;
8874 switch (op) {
8875 case Iop_ShlN16x8: shl = True; size = 16; break;
8876 case Iop_ShlN32x4: shl = True; size = 32; break;
8877 case Iop_ShlN64x2: shl = True; size = 64; break;
8878 case Iop_SarN16x8: sar = True; size = 16; break;
8879 case Iop_SarN32x4: sar = True; size = 32; break;
8880 case Iop_ShrN16x8: shr = True; size = 16; break;
8881 case Iop_ShrN32x4: shr = True; size = 32; break;
8882 case Iop_ShrN64x2: shr = True; size = 64; break;
8883 default: vassert(0);
8884 }
8885
8886 if (shl || shr) {
8887 assign( e1, amt >= size
8888 ? mkV128(0x0000)
8889 : binop(op, mkexpr(e0), mkU8(amt))
8890 );
8891 } else
8892 if (sar) {
8893 assign( e1, amt >= size
8894 ? binop(op, mkexpr(e0), mkU8(size-1))
8895 : binop(op, mkexpr(e0), mkU8(amt))
8896 );
8897 } else {
8898 vassert(0);
8899 }
8900
8901 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
8902 return delta;
8903}
sewardj1a01e652005-02-23 11:39:21 +00008904
8905
8906/* Get the current SSE rounding mode. */
8907
8908static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
8909{
8910 return
8911 unop( Iop_64to32,
8912 binop( Iop_And64,
8913 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
8914 mkU64(3) ));
8915}
8916
sewardjbcbb9de2005-03-27 02:22:32 +00008917static void put_sse_roundingmode ( IRExpr* sseround )
8918{
sewardjdd40fdf2006-12-24 02:20:24 +00008919 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
sewardjbcbb9de2005-03-27 02:22:32 +00008920 stmt( IRStmt_Put( OFFB_SSEROUND,
8921 unop(Iop_32Uto64,sseround) ) );
8922}
8923
sewardj4b1cc832012-06-13 11:10:20 +00008924/* Break a V128-bit value up into four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00008925
sewardj4b1cc832012-06-13 11:10:20 +00008926static void breakupV128to32s ( IRTemp t128,
8927 /*OUTs*/
8928 IRTemp* t3, IRTemp* t2,
8929 IRTemp* t1, IRTemp* t0 )
sewardja7ba8c42005-05-10 20:08:34 +00008930{
8931 IRTemp hi64 = newTemp(Ity_I64);
8932 IRTemp lo64 = newTemp(Ity_I64);
8933 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
8934 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
8935
8936 vassert(t0 && *t0 == IRTemp_INVALID);
8937 vassert(t1 && *t1 == IRTemp_INVALID);
8938 vassert(t2 && *t2 == IRTemp_INVALID);
8939 vassert(t3 && *t3 == IRTemp_INVALID);
8940
8941 *t0 = newTemp(Ity_I32);
8942 *t1 = newTemp(Ity_I32);
8943 *t2 = newTemp(Ity_I32);
8944 *t3 = newTemp(Ity_I32);
8945 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
8946 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
8947 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
8948 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
8949}
8950
sewardj4b1cc832012-06-13 11:10:20 +00008951/* Construct a V128-bit value from four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00008952
sewardj4b1cc832012-06-13 11:10:20 +00008953static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
8954 IRTemp t1, IRTemp t0 )
sewardja7ba8c42005-05-10 20:08:34 +00008955{
8956 return
8957 binop( Iop_64HLtoV128,
8958 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
8959 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
8960 );
8961}
8962
8963/* Break a 64-bit value up into four 16-bit ints. */
8964
8965static void breakup64to16s ( IRTemp t64,
8966 /*OUTs*/
8967 IRTemp* t3, IRTemp* t2,
8968 IRTemp* t1, IRTemp* t0 )
8969{
8970 IRTemp hi32 = newTemp(Ity_I32);
8971 IRTemp lo32 = newTemp(Ity_I32);
8972 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
8973 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
8974
8975 vassert(t0 && *t0 == IRTemp_INVALID);
8976 vassert(t1 && *t1 == IRTemp_INVALID);
8977 vassert(t2 && *t2 == IRTemp_INVALID);
8978 vassert(t3 && *t3 == IRTemp_INVALID);
8979
8980 *t0 = newTemp(Ity_I16);
8981 *t1 = newTemp(Ity_I16);
8982 *t2 = newTemp(Ity_I16);
8983 *t3 = newTemp(Ity_I16);
8984 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
8985 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
8986 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
8987 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
8988}
8989
8990/* Construct a 64-bit value from four 16-bit ints. */
8991
8992static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
8993 IRTemp t1, IRTemp t0 )
8994{
8995 return
8996 binop( Iop_32HLto64,
8997 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
8998 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
8999 );
9000}
sewardjdf0e0022005-01-25 15:48:43 +00009001
sewardj4b1cc832012-06-13 11:10:20 +00009002/* Break a V256-bit value up into four 64-bit ints. */
9003
9004static void breakupV256to64s ( IRTemp t256,
9005 /*OUTs*/
9006 IRTemp* t3, IRTemp* t2,
9007 IRTemp* t1, IRTemp* t0 )
9008{
9009 vassert(t0 && *t0 == IRTemp_INVALID);
9010 vassert(t1 && *t1 == IRTemp_INVALID);
9011 vassert(t2 && *t2 == IRTemp_INVALID);
9012 vassert(t3 && *t3 == IRTemp_INVALID);
9013 *t0 = newTemp(Ity_I64);
9014 *t1 = newTemp(Ity_I64);
9015 *t2 = newTemp(Ity_I64);
9016 *t3 = newTemp(Ity_I64);
9017 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9018 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9019 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9020 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9021}
9022
sewardjfe0c5e72012-06-15 15:48:07 +00009023/* Break a V256-bit value up into two V128s. */
9024
9025static void breakupV256toV128s ( IRTemp t256,
9026 /*OUTs*/
9027 IRTemp* t1, IRTemp* t0 )
9028{
9029 vassert(t0 && *t0 == IRTemp_INVALID);
9030 vassert(t1 && *t1 == IRTemp_INVALID);
9031 *t0 = newTemp(Ity_V128);
9032 *t1 = newTemp(Ity_V128);
9033 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9034 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9035}
sewardjdf0e0022005-01-25 15:48:43 +00009036
sewardj4f228902012-06-21 09:17:58 +00009037/* Break a V256-bit value up into eight 32-bit ints. */
9038
9039static void breakupV256to32s ( IRTemp t256,
9040 /*OUTs*/
9041 IRTemp* t7, IRTemp* t6,
9042 IRTemp* t5, IRTemp* t4,
9043 IRTemp* t3, IRTemp* t2,
9044 IRTemp* t1, IRTemp* t0 )
9045{
9046 IRTemp t128_1 = IRTemp_INVALID;
9047 IRTemp t128_0 = IRTemp_INVALID;
9048 breakupV256toV128s( t256, &t128_1, &t128_0 );
9049 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9050 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9051}
9052
sewardjd8bca7e2012-06-20 11:46:19 +00009053/* Break a V128-bit value up into two 64-bit ints. */
9054
9055static void breakupV128to64s ( IRTemp t128,
9056 /*OUTs*/
9057 IRTemp* t1, IRTemp* t0 )
9058{
9059 vassert(t0 && *t0 == IRTemp_INVALID);
9060 vassert(t1 && *t1 == IRTemp_INVALID);
9061 *t0 = newTemp(Ity_I64);
9062 *t1 = newTemp(Ity_I64);
9063 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9064 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9065}
9066
sewardj89378162012-06-24 12:12:20 +00009067/* Construct a V256-bit value from eight 32-bit ints. */
9068
9069static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9070 IRTemp t5, IRTemp t4,
9071 IRTemp t3, IRTemp t2,
9072 IRTemp t1, IRTemp t0 )
9073{
9074 return
9075 binop( Iop_V128HLtoV256,
9076 binop( Iop_64HLtoV128,
9077 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9078 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9079 binop( Iop_64HLtoV128,
9080 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9081 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9082 );
9083}
9084
9085/* Construct a V256-bit value from four 64-bit ints. */
9086
9087static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9088 IRTemp t1, IRTemp t0 )
9089{
9090 return
9091 binop( Iop_V128HLtoV256,
9092 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9093 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9094 );
9095}
9096
sewardjd166e282008-02-06 11:42:45 +00009097/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9098 values (aa,bb), computes, for each of the 4 16-bit lanes:
9099
9100 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9101*/
9102static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9103{
9104 IRTemp aa = newTemp(Ity_I64);
9105 IRTemp bb = newTemp(Ity_I64);
9106 IRTemp aahi32s = newTemp(Ity_I64);
9107 IRTemp aalo32s = newTemp(Ity_I64);
9108 IRTemp bbhi32s = newTemp(Ity_I64);
9109 IRTemp bblo32s = newTemp(Ity_I64);
9110 IRTemp rHi = newTemp(Ity_I64);
9111 IRTemp rLo = newTemp(Ity_I64);
9112 IRTemp one32x2 = newTemp(Ity_I64);
9113 assign(aa, aax);
9114 assign(bb, bbx);
9115 assign( aahi32s,
9116 binop(Iop_SarN32x2,
9117 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9118 mkU8(16) ));
9119 assign( aalo32s,
9120 binop(Iop_SarN32x2,
9121 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9122 mkU8(16) ));
9123 assign( bbhi32s,
9124 binop(Iop_SarN32x2,
9125 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9126 mkU8(16) ));
9127 assign( bblo32s,
9128 binop(Iop_SarN32x2,
9129 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9130 mkU8(16) ));
9131 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9132 assign(
9133 rHi,
9134 binop(
9135 Iop_ShrN32x2,
9136 binop(
9137 Iop_Add32x2,
9138 binop(
9139 Iop_ShrN32x2,
9140 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9141 mkU8(14)
9142 ),
9143 mkexpr(one32x2)
9144 ),
9145 mkU8(1)
9146 )
9147 );
9148 assign(
9149 rLo,
9150 binop(
9151 Iop_ShrN32x2,
9152 binop(
9153 Iop_Add32x2,
9154 binop(
9155 Iop_ShrN32x2,
9156 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9157 mkU8(14)
9158 ),
9159 mkexpr(one32x2)
9160 ),
9161 mkU8(1)
9162 )
9163 );
9164 return
9165 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9166}
9167
9168/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9169 values (aa,bb), computes, for each lane:
9170
9171 if aa_lane < 0 then - bb_lane
9172 else if aa_lane > 0 then bb_lane
9173 else 0
9174*/
9175static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9176{
9177 IRTemp aa = newTemp(Ity_I64);
9178 IRTemp bb = newTemp(Ity_I64);
9179 IRTemp zero = newTemp(Ity_I64);
9180 IRTemp bbNeg = newTemp(Ity_I64);
9181 IRTemp negMask = newTemp(Ity_I64);
9182 IRTemp posMask = newTemp(Ity_I64);
9183 IROp opSub = Iop_INVALID;
9184 IROp opCmpGTS = Iop_INVALID;
9185
9186 switch (laneszB) {
9187 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9188 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9189 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9190 default: vassert(0);
9191 }
9192
9193 assign( aa, aax );
9194 assign( bb, bbx );
9195 assign( zero, mkU64(0) );
9196 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9197 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9198 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9199
9200 return
9201 binop(Iop_Or64,
9202 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9203 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9204
9205}
9206
sewardj97f72452012-05-23 05:56:53 +00009207
sewardjd166e282008-02-06 11:42:45 +00009208/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9209 value aa, computes, for each lane
9210
9211 if aa < 0 then -aa else aa
9212
9213 Note that the result is interpreted as unsigned, so that the
9214 absolute value of the most negative signed input can be
9215 represented.
9216*/
sewardj97f72452012-05-23 05:56:53 +00009217static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
sewardjd166e282008-02-06 11:42:45 +00009218{
sewardj97f72452012-05-23 05:56:53 +00009219 IRTemp res = newTemp(Ity_I64);
sewardjd166e282008-02-06 11:42:45 +00009220 IRTemp zero = newTemp(Ity_I64);
9221 IRTemp aaNeg = newTemp(Ity_I64);
9222 IRTemp negMask = newTemp(Ity_I64);
9223 IRTemp posMask = newTemp(Ity_I64);
9224 IROp opSub = Iop_INVALID;
9225 IROp opSarN = Iop_INVALID;
9226
9227 switch (laneszB) {
9228 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9229 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9230 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9231 default: vassert(0);
9232 }
9233
sewardjd166e282008-02-06 11:42:45 +00009234 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9235 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9236 assign( zero, mkU64(0) );
9237 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
sewardj97f72452012-05-23 05:56:53 +00009238 assign( res,
9239 binop(Iop_Or64,
9240 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9241 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9242 return res;
sewardjd166e282008-02-06 11:42:45 +00009243}
9244
sewardj97f72452012-05-23 05:56:53 +00009245/* XMM version of math_PABS_MMX. */
9246static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9247{
9248 IRTemp res = newTemp(Ity_V128);
9249 IRTemp aaHi = newTemp(Ity_I64);
9250 IRTemp aaLo = newTemp(Ity_I64);
9251 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9252 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9253 assign(res, binop(Iop_64HLtoV128,
9254 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9255 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9256 return res;
9257}
9258
9259/* Specialisations of math_PABS_XMM, since there's no easy way to do
9260 partial applications in C :-( */
9261static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9262 return math_PABS_XMM(aa, 4);
9263}
9264
sewardj8516a1f2012-06-24 14:26:30 +00009265static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9266 return math_PABS_XMM(aa, 2);
9267}
9268
9269static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9270 return math_PABS_XMM(aa, 1);
9271}
sewardj97f72452012-05-23 05:56:53 +00009272
sewardjd166e282008-02-06 11:42:45 +00009273static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9274 IRTemp lo64, Long byteShift )
9275{
9276 vassert(byteShift >= 1 && byteShift <= 7);
9277 return
9278 binop(Iop_Or64,
9279 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9280 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9281 );
9282}
9283
sewardj151cd3e2012-06-18 13:56:55 +00009284static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
9285{
9286 IRTemp res = newTemp(Ity_V128);
9287 IRTemp sHi = newTemp(Ity_I64);
9288 IRTemp sLo = newTemp(Ity_I64);
9289 IRTemp dHi = newTemp(Ity_I64);
9290 IRTemp dLo = newTemp(Ity_I64);
9291 IRTemp rHi = newTemp(Ity_I64);
9292 IRTemp rLo = newTemp(Ity_I64);
9293
9294 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
9295 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
9296 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
9297 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
9298
9299 if (imm8 == 0) {
9300 assign( rHi, mkexpr(sHi) );
9301 assign( rLo, mkexpr(sLo) );
9302 }
9303 else if (imm8 >= 1 && imm8 <= 7) {
9304 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
9305 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
9306 }
9307 else if (imm8 == 8) {
9308 assign( rHi, mkexpr(dLo) );
9309 assign( rLo, mkexpr(sHi) );
9310 }
9311 else if (imm8 >= 9 && imm8 <= 15) {
9312 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
9313 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
9314 }
9315 else if (imm8 == 16) {
9316 assign( rHi, mkexpr(dHi) );
9317 assign( rLo, mkexpr(dLo) );
9318 }
9319 else if (imm8 >= 17 && imm8 <= 23) {
9320 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
9321 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
9322 }
9323 else if (imm8 == 24) {
9324 assign( rHi, mkU64(0) );
9325 assign( rLo, mkexpr(dHi) );
9326 }
9327 else if (imm8 >= 25 && imm8 <= 31) {
9328 assign( rHi, mkU64(0) );
9329 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
9330 }
9331 else if (imm8 >= 32 && imm8 <= 255) {
9332 assign( rHi, mkU64(0) );
9333 assign( rLo, mkU64(0) );
9334 }
9335 else
9336 vassert(0);
9337
9338 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
9339 return res;
9340}
9341
9342
sewardj150c9cd2008-02-09 01:16:02 +00009343/* Generate a SIGSEGV followed by a restart of the current instruction
9344 if effective_addr is not 16-aligned. This is required behaviour
9345 for some SSE3 instructions and all 128-bit SSSE3 instructions.
9346 This assumes that guest_RIP_curr_instr is set correctly! */
sewardjc4530ae2012-05-21 10:18:49 +00009347static
9348void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
sewardj150c9cd2008-02-09 01:16:02 +00009349{
9350 stmt(
9351 IRStmt_Exit(
9352 binop(Iop_CmpNE64,
sewardjc4530ae2012-05-21 10:18:49 +00009353 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
sewardj150c9cd2008-02-09 01:16:02 +00009354 mkU64(0)),
9355 Ijk_SigSEGV,
sewardjc6f970f2012-04-02 21:54:49 +00009356 IRConst_U64(guest_RIP_curr_instr),
9357 OFFB_RIP
sewardj150c9cd2008-02-09 01:16:02 +00009358 )
9359 );
9360}
9361
sewardjc4530ae2012-05-21 10:18:49 +00009362static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
9363 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
9364}
9365
9366static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
9367 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
9368}
sewardjd166e282008-02-06 11:42:45 +00009369
sewardjc4356f02007-11-09 21:15:04 +00009370/* Helper for deciding whether a given insn (starting at the opcode
9371 byte) may validly be used with a LOCK prefix. The following insns
9372 may be used with LOCK when their destination operand is in memory.
sewardje9d8a262009-07-01 08:06:34 +00009373 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
sewardjc4356f02007-11-09 21:15:04 +00009374
sewardje9d8a262009-07-01 08:06:34 +00009375 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
9376 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
9377 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
9378 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
9379 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
9380 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
9381 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
sewardjc4356f02007-11-09 21:15:04 +00009382
9383 DEC FE /1, FF /1
9384 INC FE /0, FF /0
9385
9386 NEG F6 /3, F7 /3
9387 NOT F6 /2, F7 /2
9388
sewardje9d8a262009-07-01 08:06:34 +00009389 XCHG 86, 87
sewardjc4356f02007-11-09 21:15:04 +00009390
9391 BTC 0F BB, 0F BA /7
9392 BTR 0F B3, 0F BA /6
9393 BTS 0F AB, 0F BA /5
9394
9395 CMPXCHG 0F B0, 0F B1
9396 CMPXCHG8B 0F C7 /1
9397
9398 XADD 0F C0, 0F C1
sewardje9d8a262009-07-01 08:06:34 +00009399
9400 ------------------------------
9401
9402 80 /0 = addb $imm8, rm8
9403 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
9404 82 /0 = addb $imm8, rm8
9405 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
9406
9407 00 = addb r8, rm8
9408 01 = addl r32, rm32 and addw r16, rm16
9409
9410 Same for ADD OR ADC SBB AND SUB XOR
9411
9412 FE /1 = dec rm8
9413 FF /1 = dec rm32 and dec rm16
9414
9415 FE /0 = inc rm8
9416 FF /0 = inc rm32 and inc rm16
9417
9418 F6 /3 = neg rm8
9419 F7 /3 = neg rm32 and neg rm16
9420
9421 F6 /2 = not rm8
9422 F7 /2 = not rm32 and not rm16
9423
9424 0F BB = btcw r16, rm16 and btcl r32, rm32
9425 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
9426
9427 Same for BTS, BTR
sewardjc4356f02007-11-09 21:15:04 +00009428*/
9429static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
9430{
9431 switch (opc[0]) {
sewardje9d8a262009-07-01 08:06:34 +00009432 case 0x00: case 0x01: case 0x08: case 0x09:
9433 case 0x10: case 0x11: case 0x18: case 0x19:
9434 case 0x20: case 0x21: case 0x28: case 0x29:
9435 case 0x30: case 0x31:
9436 if (!epartIsReg(opc[1]))
9437 return True;
9438 break;
sewardjc4356f02007-11-09 21:15:04 +00009439
sewardje9d8a262009-07-01 08:06:34 +00009440 case 0x80: case 0x81: case 0x82: case 0x83:
9441 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
9442 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009443 return True;
9444 break;
9445
9446 case 0xFE: case 0xFF:
sewardje9d8a262009-07-01 08:06:34 +00009447 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
9448 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009449 return True;
9450 break;
9451
9452 case 0xF6: case 0xF7:
sewardje9d8a262009-07-01 08:06:34 +00009453 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
9454 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009455 return True;
9456 break;
9457
9458 case 0x86: case 0x87:
sewardje9d8a262009-07-01 08:06:34 +00009459 if (!epartIsReg(opc[1]))
9460 return True;
9461 break;
sewardjc4356f02007-11-09 21:15:04 +00009462
9463 case 0x0F: {
9464 switch (opc[1]) {
9465 case 0xBB: case 0xB3: case 0xAB:
sewardje9d8a262009-07-01 08:06:34 +00009466 if (!epartIsReg(opc[2]))
9467 return True;
9468 break;
sewardjc4356f02007-11-09 21:15:04 +00009469 case 0xBA:
sewardje9d8a262009-07-01 08:06:34 +00009470 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
9471 && !epartIsReg(opc[2]))
sewardjc4356f02007-11-09 21:15:04 +00009472 return True;
9473 break;
9474 case 0xB0: case 0xB1:
sewardje9d8a262009-07-01 08:06:34 +00009475 if (!epartIsReg(opc[2]))
9476 return True;
9477 break;
sewardjc4356f02007-11-09 21:15:04 +00009478 case 0xC7:
sewardje9d8a262009-07-01 08:06:34 +00009479 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
sewardjc4356f02007-11-09 21:15:04 +00009480 return True;
9481 break;
9482 case 0xC0: case 0xC1:
sewardje9d8a262009-07-01 08:06:34 +00009483 if (!epartIsReg(opc[2]))
9484 return True;
9485 break;
sewardjc4356f02007-11-09 21:15:04 +00009486 default:
9487 break;
9488 } /* switch (opc[1]) */
9489 break;
9490 }
9491
9492 default:
9493 break;
9494 } /* switch (opc[0]) */
9495
9496 return False;
9497}
9498
9499
sewardjdf0e0022005-01-25 15:48:43 +00009500/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +00009501/*--- ---*/
9502/*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
9503/*--- ---*/
9504/*------------------------------------------------------------*/
9505
sewardjc4530ae2012-05-21 10:18:49 +00009506static Long dis_COMISD ( VexAbiInfo* vbi, Prefix pfx,
9507 Long delta, Bool isAvx, UChar opc )
9508{
9509 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
9510 Int alen = 0;
9511 HChar dis_buf[50];
9512 IRTemp argL = newTemp(Ity_F64);
9513 IRTemp argR = newTemp(Ity_F64);
9514 UChar modrm = getUChar(delta);
9515 IRTemp addr = IRTemp_INVALID;
9516 if (epartIsReg(modrm)) {
9517 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
9518 0/*lowest lane*/ ) );
9519 delta += 1;
9520 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9521 opc==0x2E ? "u" : "",
9522 nameXMMReg(eregOfRexRM(pfx,modrm)),
9523 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9524 } else {
9525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9526 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9527 delta += alen;
9528 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9529 opc==0x2E ? "u" : "",
9530 dis_buf,
9531 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9532 }
9533 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
9534 0/*lowest lane*/ ) );
9535
9536 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9537 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9538 stmt( IRStmt_Put(
9539 OFFB_CC_DEP1,
9540 binop( Iop_And64,
9541 unop( Iop_32Uto64,
9542 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
9543 mkU64(0x45)
9544 )));
9545 return delta;
9546}
9547
9548
9549static Long dis_COMISS ( VexAbiInfo* vbi, Prefix pfx,
9550 Long delta, Bool isAvx, UChar opc )
9551{
9552 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
9553 Int alen = 0;
9554 HChar dis_buf[50];
9555 IRTemp argL = newTemp(Ity_F32);
9556 IRTemp argR = newTemp(Ity_F32);
9557 UChar modrm = getUChar(delta);
9558 IRTemp addr = IRTemp_INVALID;
9559 if (epartIsReg(modrm)) {
9560 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
9561 0/*lowest lane*/ ) );
9562 delta += 1;
9563 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
9564 opc==0x2E ? "u" : "",
9565 nameXMMReg(eregOfRexRM(pfx,modrm)),
9566 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9567 } else {
9568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9569 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
9570 delta += alen;
9571 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
9572 opc==0x2E ? "u" : "",
9573 dis_buf,
9574 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9575 }
9576 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
9577 0/*lowest lane*/ ) );
9578
9579 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9580 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9581 stmt( IRStmt_Put(
9582 OFFB_CC_DEP1,
9583 binop( Iop_And64,
9584 unop( Iop_32Uto64,
9585 binop(Iop_CmpF64,
9586 unop(Iop_F32toF64,mkexpr(argL)),
9587 unop(Iop_F32toF64,mkexpr(argR)))),
9588 mkU64(0x45)
9589 )));
9590 return delta;
9591}
9592
9593
9594static Long dis_PSHUFD_32x4 ( VexAbiInfo* vbi, Prefix pfx,
9595 Long delta, Bool writesYmm )
9596{
9597 Int order;
9598 Int alen = 0;
9599 HChar dis_buf[50];
9600 IRTemp sV = newTemp(Ity_V128);
9601 UChar modrm = getUChar(delta);
9602 HChar* strV = writesYmm ? "v" : "";
9603 IRTemp addr = IRTemp_INVALID;
9604 if (epartIsReg(modrm)) {
9605 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
9606 order = (Int)getUChar(delta+1);
9607 delta += 1+1;
9608 DIP("%spshufd $%d,%s,%s\n", strV, order,
9609 nameXMMReg(eregOfRexRM(pfx,modrm)),
9610 nameXMMReg(gregOfRexRM(pfx,modrm)));
9611 } else {
9612 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
9613 1/*byte after the amode*/ );
9614 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
9615 order = (Int)getUChar(delta+alen);
9616 delta += alen+1;
9617 DIP("%spshufd $%d,%s,%s\n", strV, order,
9618 dis_buf,
9619 nameXMMReg(gregOfRexRM(pfx,modrm)));
9620 }
9621
9622 IRTemp s3, s2, s1, s0;
9623 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +00009624 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +00009625
9626# define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9627 IRTemp dV = newTemp(Ity_V128);
9628 assign(dV,
sewardj4b1cc832012-06-13 11:10:20 +00009629 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
9630 SEL((order>>2)&3), SEL((order>>0)&3) )
sewardjc4530ae2012-05-21 10:18:49 +00009631 );
9632# undef SEL
9633
9634 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
9635 (gregOfRexRM(pfx,modrm), mkexpr(dV));
9636 return delta;
9637}
9638
9639
9640static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
9641{
9642 IRTemp dV = newTemp(Ity_V128);
9643 IRTemp hi64 = newTemp(Ity_I64);
9644 IRTemp lo64 = newTemp(Ity_I64);
9645 IRTemp hi64r = newTemp(Ity_I64);
9646 IRTemp lo64r = newTemp(Ity_I64);
9647
9648 vassert(imm >= 0 && imm <= 255);
9649 if (imm >= 16) {
9650 assign(dV, mkV128(0x0000));
9651 return dV;
9652 }
9653
9654 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
9655 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
9656
9657 if (imm == 0) {
9658 assign( lo64r, mkexpr(lo64) );
9659 assign( hi64r, mkexpr(hi64) );
9660 }
9661 else
9662 if (imm == 8) {
9663 assign( hi64r, mkU64(0) );
9664 assign( lo64r, mkexpr(hi64) );
9665 }
9666 else
9667 if (imm > 8) {
9668 assign( hi64r, mkU64(0) );
sewardj251b59e2012-05-25 13:51:07 +00009669 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
sewardjc4530ae2012-05-21 10:18:49 +00009670 } else {
sewardj251b59e2012-05-25 13:51:07 +00009671 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
sewardjc4530ae2012-05-21 10:18:49 +00009672 assign( lo64r,
9673 binop( Iop_Or64,
9674 binop(Iop_Shr64, mkexpr(lo64),
9675 mkU8(8 * imm)),
9676 binop(Iop_Shl64, mkexpr(hi64),
9677 mkU8(8 * (8 - imm)) )
9678 )
9679 );
9680 }
9681
9682 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
9683 return dV;
9684}
9685
9686
sewardj251b59e2012-05-25 13:51:07 +00009687static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
9688{
9689 IRTemp dV = newTemp(Ity_V128);
9690 IRTemp hi64 = newTemp(Ity_I64);
9691 IRTemp lo64 = newTemp(Ity_I64);
9692 IRTemp hi64r = newTemp(Ity_I64);
9693 IRTemp lo64r = newTemp(Ity_I64);
9694
9695 vassert(imm >= 0 && imm <= 255);
9696 if (imm >= 16) {
9697 assign(dV, mkV128(0x0000));
9698 return dV;
9699 }
9700
9701 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
9702 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
9703
9704 if (imm == 0) {
9705 assign( lo64r, mkexpr(lo64) );
9706 assign( hi64r, mkexpr(hi64) );
9707 }
9708 else
9709 if (imm == 8) {
9710 assign( lo64r, mkU64(0) );
9711 assign( hi64r, mkexpr(lo64) );
9712 }
9713 else
9714 if (imm > 8) {
9715 assign( lo64r, mkU64(0) );
9716 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
9717 } else {
9718 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
9719 assign( hi64r,
9720 binop( Iop_Or64,
9721 binop(Iop_Shl64, mkexpr(hi64),
9722 mkU8(8 * imm)),
9723 binop(Iop_Shr64, mkexpr(lo64),
9724 mkU8(8 * (8 - imm)) )
9725 )
9726 );
9727 }
9728
9729 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
9730 return dV;
9731}
9732
9733
sewardjc4530ae2012-05-21 10:18:49 +00009734static Long dis_CVTxSD2SI ( VexAbiInfo* vbi, Prefix pfx,
sewardj80804d12012-05-22 10:48:13 +00009735 Long delta, Bool isAvx, UChar opc, Int sz )
sewardjc4530ae2012-05-21 10:18:49 +00009736{
9737 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
sewardjc4530ae2012-05-21 10:18:49 +00009738 HChar dis_buf[50];
sewardj80804d12012-05-22 10:48:13 +00009739 Int alen = 0;
9740 UChar modrm = getUChar(delta);
9741 IRTemp addr = IRTemp_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +00009742 IRTemp rmode = newTemp(Ity_I32);
9743 IRTemp f64lo = newTemp(Ity_F64);
9744 Bool r2zero = toBool(opc == 0x2C);
9745
sewardjc4530ae2012-05-21 10:18:49 +00009746 if (epartIsReg(modrm)) {
9747 delta += 1;
9748 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
9749 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
9750 nameXMMReg(eregOfRexRM(pfx,modrm)),
9751 nameIReg(sz, gregOfRexRM(pfx,modrm),
9752 False));
9753 } else {
9754 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9755 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
9756 delta += alen;
9757 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
9758 dis_buf,
9759 nameIReg(sz, gregOfRexRM(pfx,modrm),
9760 False));
9761 }
9762
9763 if (r2zero) {
9764 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9765 } else {
9766 assign( rmode, get_sse_roundingmode() );
9767 }
9768
9769 if (sz == 4) {
9770 putIReg32( gregOfRexRM(pfx,modrm),
9771 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
9772 } else {
sewardj80804d12012-05-22 10:48:13 +00009773 vassert(sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +00009774 putIReg64( gregOfRexRM(pfx,modrm),
9775 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
9776 }
9777
9778 return delta;
9779}
9780
9781
sewardj80804d12012-05-22 10:48:13 +00009782static Long dis_CVTxSS2SI ( VexAbiInfo* vbi, Prefix pfx,
9783 Long delta, Bool isAvx, UChar opc, Int sz )
9784{
9785 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
9786 HChar dis_buf[50];
9787 Int alen = 0;
9788 UChar modrm = getUChar(delta);
9789 IRTemp addr = IRTemp_INVALID;
9790 IRTemp rmode = newTemp(Ity_I32);
9791 IRTemp f32lo = newTemp(Ity_F32);
9792 Bool r2zero = toBool(opc == 0x2C);
9793
9794 if (epartIsReg(modrm)) {
9795 delta += 1;
9796 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
9797 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
9798 nameXMMReg(eregOfRexRM(pfx,modrm)),
9799 nameIReg(sz, gregOfRexRM(pfx,modrm),
9800 False));
9801 } else {
9802 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9803 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
9804 delta += alen;
9805 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
9806 dis_buf,
9807 nameIReg(sz, gregOfRexRM(pfx,modrm),
9808 False));
9809 }
9810
9811 if (r2zero) {
9812 assign( rmode, mkU32((UInt)Irrm_ZERO) );
9813 } else {
9814 assign( rmode, get_sse_roundingmode() );
9815 }
9816
9817 if (sz == 4) {
9818 putIReg32( gregOfRexRM(pfx,modrm),
9819 binop( Iop_F64toI32S,
9820 mkexpr(rmode),
9821 unop(Iop_F32toF64, mkexpr(f32lo))) );
9822 } else {
9823 vassert(sz == 8);
9824 putIReg64( gregOfRexRM(pfx,modrm),
9825 binop( Iop_F64toI64S,
9826 mkexpr(rmode),
9827 unop(Iop_F32toF64, mkexpr(f32lo))) );
9828 }
9829
9830 return delta;
9831}
9832
9833
sewardj66becf32012-06-18 23:15:16 +00009834static Long dis_CVTPS2PD_128 ( VexAbiInfo* vbi, Prefix pfx,
9835 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +00009836{
9837 IRTemp addr = IRTemp_INVALID;
9838 Int alen = 0;
9839 HChar dis_buf[50];
9840 IRTemp f32lo = newTemp(Ity_F32);
9841 IRTemp f32hi = newTemp(Ity_F32);
9842 UChar modrm = getUChar(delta);
9843 UInt rG = gregOfRexRM(pfx,modrm);
9844 if (epartIsReg(modrm)) {
9845 UInt rE = eregOfRexRM(pfx,modrm);
9846 assign( f32lo, getXMMRegLane32F(rE, 0) );
9847 assign( f32hi, getXMMRegLane32F(rE, 1) );
9848 delta += 1;
9849 DIP("%scvtps2pd %s,%s\n",
9850 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
9851 } else {
9852 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9853 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
9854 assign( f32hi, loadLE(Ity_F32,
9855 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
9856 delta += alen;
9857 DIP("%scvtps2pd %s,%s\n",
9858 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
9859 }
9860
9861 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
9862 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
9863 if (isAvx)
9864 putYMMRegLane128( rG, 1, mkV128(0));
9865 return delta;
9866}
9867
9868
sewardj66becf32012-06-18 23:15:16 +00009869static Long dis_CVTPS2PD_256 ( VexAbiInfo* vbi, Prefix pfx,
9870 Long delta )
9871{
9872 IRTemp addr = IRTemp_INVALID;
9873 Int alen = 0;
9874 HChar dis_buf[50];
9875 IRTemp f32_0 = newTemp(Ity_F32);
9876 IRTemp f32_1 = newTemp(Ity_F32);
9877 IRTemp f32_2 = newTemp(Ity_F32);
9878 IRTemp f32_3 = newTemp(Ity_F32);
9879 UChar modrm = getUChar(delta);
9880 UInt rG = gregOfRexRM(pfx,modrm);
9881 if (epartIsReg(modrm)) {
9882 UInt rE = eregOfRexRM(pfx,modrm);
9883 assign( f32_0, getXMMRegLane32F(rE, 0) );
9884 assign( f32_1, getXMMRegLane32F(rE, 1) );
9885 assign( f32_2, getXMMRegLane32F(rE, 2) );
9886 assign( f32_3, getXMMRegLane32F(rE, 3) );
9887 delta += 1;
9888 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
9889 } else {
9890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9891 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
9892 assign( f32_1, loadLE(Ity_F32,
9893 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
9894 assign( f32_2, loadLE(Ity_F32,
9895 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
9896 assign( f32_3, loadLE(Ity_F32,
9897 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
9898 delta += alen;
9899 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
9900 }
9901
9902 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
9903 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
9904 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
9905 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
9906 return delta;
9907}
9908
9909
sewardj6fcd43e2012-06-14 08:51:35 +00009910static Long dis_CVTPD2PS_128 ( VexAbiInfo* vbi, Prefix pfx,
9911 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +00009912{
9913 IRTemp addr = IRTemp_INVALID;
9914 Int alen = 0;
9915 HChar dis_buf[50];
9916 UChar modrm = getUChar(delta);
9917 UInt rG = gregOfRexRM(pfx,modrm);
9918 IRTemp argV = newTemp(Ity_V128);
9919 IRTemp rmode = newTemp(Ity_I32);
9920 if (epartIsReg(modrm)) {
9921 UInt rE = eregOfRexRM(pfx,modrm);
9922 assign( argV, getXMMReg(rE) );
9923 delta += 1;
9924 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
9925 nameXMMReg(rE), nameXMMReg(rG));
9926 } else {
9927 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9928 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9929 delta += alen;
9930 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
9931 dis_buf, nameXMMReg(rG) );
9932 }
9933
9934 assign( rmode, get_sse_roundingmode() );
9935 IRTemp t0 = newTemp(Ity_F64);
9936 IRTemp t1 = newTemp(Ity_F64);
9937 assign( t0, unop(Iop_ReinterpI64asF64,
9938 unop(Iop_V128to64, mkexpr(argV))) );
9939 assign( t1, unop(Iop_ReinterpI64asF64,
9940 unop(Iop_V128HIto64, mkexpr(argV))) );
9941
9942# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
9943 putXMMRegLane32( rG, 3, mkU32(0) );
9944 putXMMRegLane32( rG, 2, mkU32(0) );
9945 putXMMRegLane32F( rG, 1, CVT(t1) );
9946 putXMMRegLane32F( rG, 0, CVT(t0) );
9947# undef CVT
9948 if (isAvx)
9949 putYMMRegLane128( rG, 1, mkV128(0) );
9950
9951 return delta;
9952}
9953
9954
sewardj66becf32012-06-18 23:15:16 +00009955static Long dis_CVTxPS2DQ_128 ( VexAbiInfo* vbi, Prefix pfx,
9956 Long delta, Bool isAvx, Bool r2zero )
sewardj251b59e2012-05-25 13:51:07 +00009957{
9958 IRTemp addr = IRTemp_INVALID;
9959 Int alen = 0;
9960 HChar dis_buf[50];
9961 UChar modrm = getUChar(delta);
9962 IRTemp argV = newTemp(Ity_V128);
9963 IRTemp rmode = newTemp(Ity_I32);
9964 UInt rG = gregOfRexRM(pfx,modrm);
9965 IRTemp t0, t1, t2, t3;
9966
9967 if (epartIsReg(modrm)) {
9968 UInt rE = eregOfRexRM(pfx,modrm);
9969 assign( argV, getXMMReg(rE) );
9970 delta += 1;
sewardj66becf32012-06-18 23:15:16 +00009971 DIP("%scvt%sps2dq %s,%s\n",
9972 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
sewardj251b59e2012-05-25 13:51:07 +00009973 } else {
9974 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9975 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
9976 delta += alen;
sewardj66becf32012-06-18 23:15:16 +00009977 DIP("%scvt%sps2dq %s,%s\n",
9978 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
sewardj251b59e2012-05-25 13:51:07 +00009979 }
9980
9981 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
9982 : get_sse_roundingmode() );
9983 t0 = t1 = t2 = t3 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +00009984 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
sewardj251b59e2012-05-25 13:51:07 +00009985 /* This is less than ideal. If it turns out to be a performance
9986 bottleneck it can be improved. */
9987# define CVT(_t) \
9988 binop( Iop_F64toI32S, \
9989 mkexpr(rmode), \
9990 unop( Iop_F32toF64, \
9991 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9992
9993 putXMMRegLane32( rG, 3, CVT(t3) );
9994 putXMMRegLane32( rG, 2, CVT(t2) );
9995 putXMMRegLane32( rG, 1, CVT(t1) );
9996 putXMMRegLane32( rG, 0, CVT(t0) );
9997# undef CVT
9998 if (isAvx)
9999 putYMMRegLane128( rG, 1, mkV128(0) );
10000
10001 return delta;
10002}
10003
10004
sewardj66becf32012-06-18 23:15:16 +000010005static Long dis_CVTxPS2DQ_256 ( VexAbiInfo* vbi, Prefix pfx,
10006 Long delta, Bool r2zero )
10007{
10008 IRTemp addr = IRTemp_INVALID;
10009 Int alen = 0;
10010 HChar dis_buf[50];
10011 UChar modrm = getUChar(delta);
10012 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010013 IRTemp rmode = newTemp(Ity_I32);
10014 UInt rG = gregOfRexRM(pfx,modrm);
10015 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10016
10017 if (epartIsReg(modrm)) {
10018 UInt rE = eregOfRexRM(pfx,modrm);
10019 assign( argV, getYMMReg(rE) );
10020 delta += 1;
10021 DIP("vcvt%sps2dq %s,%s\n",
10022 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10023 } else {
10024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10025 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10026 delta += alen;
10027 DIP("vcvt%sps2dq %s,%s\n",
10028 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10029 }
10030
10031 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10032 : get_sse_roundingmode() );
10033 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010034 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010035 /* This is less than ideal. If it turns out to be a performance
10036 bottleneck it can be improved. */
10037# define CVT(_t) \
10038 binop( Iop_F64toI32S, \
10039 mkexpr(rmode), \
10040 unop( Iop_F32toF64, \
10041 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10042
10043 putYMMRegLane32( rG, 7, CVT(t7) );
10044 putYMMRegLane32( rG, 6, CVT(t6) );
10045 putYMMRegLane32( rG, 5, CVT(t5) );
10046 putYMMRegLane32( rG, 4, CVT(t4) );
10047 putYMMRegLane32( rG, 3, CVT(t3) );
10048 putYMMRegLane32( rG, 2, CVT(t2) );
10049 putYMMRegLane32( rG, 1, CVT(t1) );
10050 putYMMRegLane32( rG, 0, CVT(t0) );
10051# undef CVT
10052
10053 return delta;
10054}
10055
10056
10057static Long dis_CVTxPD2DQ_128 ( VexAbiInfo* vbi, Prefix pfx,
10058 Long delta, Bool isAvx, Bool r2zero )
10059{
10060 IRTemp addr = IRTemp_INVALID;
10061 Int alen = 0;
10062 HChar dis_buf[50];
10063 UChar modrm = getUChar(delta);
10064 IRTemp argV = newTemp(Ity_V128);
10065 IRTemp rmode = newTemp(Ity_I32);
10066 UInt rG = gregOfRexRM(pfx,modrm);
10067 IRTemp t0, t1;
10068
10069 if (epartIsReg(modrm)) {
10070 UInt rE = eregOfRexRM(pfx,modrm);
10071 assign( argV, getXMMReg(rE) );
10072 delta += 1;
10073 DIP("%scvt%spd2dq %s,%s\n",
10074 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10075 } else {
10076 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10077 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10078 delta += alen;
10079 DIP("%scvt%spd2dqx %s,%s\n",
10080 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10081 }
10082
10083 if (r2zero) {
10084 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10085 } else {
10086 assign( rmode, get_sse_roundingmode() );
10087 }
10088
10089 t0 = newTemp(Ity_F64);
10090 t1 = newTemp(Ity_F64);
10091 assign( t0, unop(Iop_ReinterpI64asF64,
10092 unop(Iop_V128to64, mkexpr(argV))) );
10093 assign( t1, unop(Iop_ReinterpI64asF64,
10094 unop(Iop_V128HIto64, mkexpr(argV))) );
10095
10096# define CVT(_t) binop( Iop_F64toI32S, \
10097 mkexpr(rmode), \
10098 mkexpr(_t) )
10099
10100 putXMMRegLane32( rG, 3, mkU32(0) );
10101 putXMMRegLane32( rG, 2, mkU32(0) );
10102 putXMMRegLane32( rG, 1, CVT(t1) );
10103 putXMMRegLane32( rG, 0, CVT(t0) );
10104# undef CVT
10105 if (isAvx)
10106 putYMMRegLane128( rG, 1, mkV128(0) );
10107
10108 return delta;
10109}
10110
10111
10112static Long dis_CVTxPD2DQ_256 ( VexAbiInfo* vbi, Prefix pfx,
10113 Long delta, Bool r2zero )
10114{
10115 IRTemp addr = IRTemp_INVALID;
10116 Int alen = 0;
10117 HChar dis_buf[50];
10118 UChar modrm = getUChar(delta);
10119 IRTemp argV = newTemp(Ity_V256);
10120 IRTemp rmode = newTemp(Ity_I32);
10121 UInt rG = gregOfRexRM(pfx,modrm);
10122 IRTemp t0, t1, t2, t3;
10123
10124 if (epartIsReg(modrm)) {
10125 UInt rE = eregOfRexRM(pfx,modrm);
10126 assign( argV, getYMMReg(rE) );
10127 delta += 1;
10128 DIP("vcvt%spd2dq %s,%s\n",
10129 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10130 } else {
10131 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10132 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10133 delta += alen;
10134 DIP("vcvt%spd2dqy %s,%s\n",
10135 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10136 }
10137
10138 if (r2zero) {
10139 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10140 } else {
10141 assign( rmode, get_sse_roundingmode() );
10142 }
10143
10144 t0 = IRTemp_INVALID;
10145 t1 = IRTemp_INVALID;
10146 t2 = IRTemp_INVALID;
10147 t3 = IRTemp_INVALID;
10148 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10149
10150# define CVT(_t) binop( Iop_F64toI32S, \
10151 mkexpr(rmode), \
10152 unop( Iop_ReinterpI64asF64, \
10153 mkexpr(_t) ) )
10154
10155 putXMMRegLane32( rG, 3, CVT(t3) );
10156 putXMMRegLane32( rG, 2, CVT(t2) );
10157 putXMMRegLane32( rG, 1, CVT(t1) );
10158 putXMMRegLane32( rG, 0, CVT(t0) );
10159# undef CVT
10160 putYMMRegLane128( rG, 1, mkV128(0) );
10161
10162 return delta;
10163}
10164
10165
10166static Long dis_CVTDQ2PS_128 ( VexAbiInfo* vbi, Prefix pfx,
10167 Long delta, Bool isAvx )
10168{
10169 IRTemp addr = IRTemp_INVALID;
10170 Int alen = 0;
10171 HChar dis_buf[50];
10172 UChar modrm = getUChar(delta);
10173 IRTemp argV = newTemp(Ity_V128);
10174 IRTemp rmode = newTemp(Ity_I32);
10175 UInt rG = gregOfRexRM(pfx,modrm);
10176 IRTemp t0, t1, t2, t3;
10177
10178 if (epartIsReg(modrm)) {
10179 UInt rE = eregOfRexRM(pfx,modrm);
10180 assign( argV, getXMMReg(rE) );
10181 delta += 1;
10182 DIP("%scvtdq2ps %s,%s\n",
10183 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10184 } else {
10185 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10186 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10187 delta += alen;
10188 DIP("%scvtdq2ps %s,%s\n",
10189 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10190 }
10191
10192 assign( rmode, get_sse_roundingmode() );
10193 t0 = IRTemp_INVALID;
10194 t1 = IRTemp_INVALID;
10195 t2 = IRTemp_INVALID;
10196 t3 = IRTemp_INVALID;
10197 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
10198
10199# define CVT(_t) binop( Iop_F64toF32, \
10200 mkexpr(rmode), \
10201 unop(Iop_I32StoF64,mkexpr(_t)))
10202
10203 putXMMRegLane32F( rG, 3, CVT(t3) );
10204 putXMMRegLane32F( rG, 2, CVT(t2) );
10205 putXMMRegLane32F( rG, 1, CVT(t1) );
10206 putXMMRegLane32F( rG, 0, CVT(t0) );
10207# undef CVT
10208 if (isAvx)
10209 putYMMRegLane128( rG, 1, mkV128(0) );
10210
10211 return delta;
10212}
10213
10214static Long dis_CVTDQ2PS_256 ( VexAbiInfo* vbi, Prefix pfx,
10215 Long delta )
10216{
10217 IRTemp addr = IRTemp_INVALID;
10218 Int alen = 0;
10219 HChar dis_buf[50];
10220 UChar modrm = getUChar(delta);
10221 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010222 IRTemp rmode = newTemp(Ity_I32);
10223 UInt rG = gregOfRexRM(pfx,modrm);
10224 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10225
10226 if (epartIsReg(modrm)) {
10227 UInt rE = eregOfRexRM(pfx,modrm);
10228 assign( argV, getYMMReg(rE) );
10229 delta += 1;
10230 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10231 } else {
10232 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10233 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10234 delta += alen;
10235 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10236 }
10237
10238 assign( rmode, get_sse_roundingmode() );
10239 t0 = IRTemp_INVALID;
10240 t1 = IRTemp_INVALID;
10241 t2 = IRTemp_INVALID;
10242 t3 = IRTemp_INVALID;
10243 t4 = IRTemp_INVALID;
10244 t5 = IRTemp_INVALID;
10245 t6 = IRTemp_INVALID;
10246 t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010247 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010248
10249# define CVT(_t) binop( Iop_F64toF32, \
10250 mkexpr(rmode), \
10251 unop(Iop_I32StoF64,mkexpr(_t)))
10252
10253 putYMMRegLane32F( rG, 7, CVT(t7) );
10254 putYMMRegLane32F( rG, 6, CVT(t6) );
10255 putYMMRegLane32F( rG, 5, CVT(t5) );
10256 putYMMRegLane32F( rG, 4, CVT(t4) );
10257 putYMMRegLane32F( rG, 3, CVT(t3) );
10258 putYMMRegLane32F( rG, 2, CVT(t2) );
10259 putYMMRegLane32F( rG, 1, CVT(t1) );
10260 putYMMRegLane32F( rG, 0, CVT(t0) );
10261# undef CVT
10262
10263 return delta;
10264}
10265
10266
sewardj8ef22422012-05-24 16:29:18 +000010267static Long dis_PMOVMSKB_128 ( VexAbiInfo* vbi, Prefix pfx,
10268 Long delta, Bool isAvx )
10269{
10270 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
10271 UChar modrm = getUChar(delta);
10272 vassert(epartIsReg(modrm)); /* ensured by caller */
10273 UInt rE = eregOfRexRM(pfx,modrm);
10274 UInt rG = gregOfRexRM(pfx,modrm);
10275 IRTemp t0 = newTemp(Ity_I64);
10276 IRTemp t1 = newTemp(Ity_I64);
10277 IRTemp t5 = newTemp(Ity_I64);
10278 assign(t0, getXMMRegLane64(rE, 0));
10279 assign(t1, getXMMRegLane64(rE, 1));
10280 assign(t5, mkIRExprCCall( Ity_I64, 0/*regparms*/,
10281 "amd64g_calculate_sse_pmovmskb",
10282 &amd64g_calculate_sse_pmovmskb,
10283 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
10284 putIReg32(rG, unop(Iop_64to32,mkexpr(t5)));
10285 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10286 nameIReg32(rG));
10287 delta += 1;
10288 return delta;
10289}
10290
10291
sewardj4b1cc832012-06-13 11:10:20 +000010292/* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10293 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
sewardj56c30312012-06-12 08:45:39 +000010294/* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10295static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
sewardjc4530ae2012-05-21 10:18:49 +000010296{
10297 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
sewardjc4530ae2012-05-21 10:18:49 +000010298 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010299 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10300 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +000010301 IRTemp res = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000010302 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
10303 : mkV128from32s( s1, d1, s0, d0 ));
sewardj56c30312012-06-12 08:45:39 +000010304 return res;
10305}
10306
10307
10308/* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
10309/* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
10310static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
10311{
10312 IRTemp s1 = newTemp(Ity_I64);
10313 IRTemp s0 = newTemp(Ity_I64);
10314 IRTemp d1 = newTemp(Ity_I64);
10315 IRTemp d0 = newTemp(Ity_I64);
10316 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10317 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10318 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10319 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10320 IRTemp res = newTemp(Ity_V128);
10321 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
10322 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
sewardjc4530ae2012-05-21 10:18:49 +000010323 return res;
10324}
10325
10326
sewardj4b1cc832012-06-13 11:10:20 +000010327/* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
10328 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
10329 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
10330 way. */
10331static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10332{
10333 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10334 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10335 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
10336 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
10337 IRTemp res = newTemp(Ity_V256);
10338 assign(res, xIsH
10339 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
10340 mkexpr(s1), mkexpr(d1))
10341 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
10342 mkexpr(s0), mkexpr(d0)));
10343 return res;
10344}
10345
10346
10347/* FIXME: this is really bad. Surely can do something better here?
10348 One observation is that the steering in the upper and lower 128 bit
10349 halves is the same as with math_UNPCKxPS_128, so we simply split
10350 into two halves, and use that. Consequently any improvement in
10351 math_UNPCKxPS_128 (probably, to use interleave-style primops)
10352 benefits this too. */
10353static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10354{
sewardjfe0c5e72012-06-15 15:48:07 +000010355 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10356 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10357 breakupV256toV128s( sV, &sVhi, &sVlo );
10358 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010359 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
10360 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
10361 IRTemp rV = newTemp(Ity_V256);
10362 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10363 return rV;
10364}
10365
10366
10367static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
sewardj251b59e2012-05-25 13:51:07 +000010368{
10369 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10370 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10371 vassert(imm8 < 256);
10372
sewardj4b1cc832012-06-13 11:10:20 +000010373 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10374 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardj251b59e2012-05-25 13:51:07 +000010375
10376# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
10377# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10378 IRTemp res = newTemp(Ity_V128);
10379 assign(res,
sewardj4b1cc832012-06-13 11:10:20 +000010380 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
10381 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
sewardj251b59e2012-05-25 13:51:07 +000010382# undef SELD
10383# undef SELS
10384 return res;
10385}
10386
10387
sewardj4b1cc832012-06-13 11:10:20 +000010388/* 256-bit SHUFPS appears to steer each of the 128-bit halves
10389 identically. Hence do the clueless thing and use math_SHUFPS_128
10390 twice. */
10391static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10392{
sewardjfe0c5e72012-06-15 15:48:07 +000010393 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10394 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10395 breakupV256toV128s( sV, &sVhi, &sVlo );
10396 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010397 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
10398 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
10399 IRTemp rV = newTemp(Ity_V256);
10400 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10401 return rV;
10402}
10403
10404
sewardj21459cb2012-06-18 14:05:52 +000010405static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10406{
10407 IRTemp s1 = newTemp(Ity_I64);
10408 IRTemp s0 = newTemp(Ity_I64);
10409 IRTemp d1 = newTemp(Ity_I64);
10410 IRTemp d0 = newTemp(Ity_I64);
10411
10412 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10413 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10414 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10415 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10416
10417# define SELD(n) mkexpr((n)==0 ? d0 : d1)
10418# define SELS(n) mkexpr((n)==0 ? s0 : s1)
10419
10420 IRTemp res = newTemp(Ity_V128);
10421 assign(res, binop( Iop_64HLtoV128,
10422 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
10423
10424# undef SELD
10425# undef SELS
10426 return res;
10427}
10428
10429
10430static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10431{
10432 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10433 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10434 breakupV256toV128s( sV, &sVhi, &sVlo );
10435 breakupV256toV128s( dV, &dVhi, &dVlo );
10436 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10437 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
10438 IRTemp rV = newTemp(Ity_V256);
10439 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10440 return rV;
10441}
10442
10443
10444static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10445{
10446 UShort imm8_mask_16;
10447 IRTemp imm8_mask = newTemp(Ity_V128);
10448
10449 switch( imm8 & 3 ) {
10450 case 0: imm8_mask_16 = 0x0000; break;
10451 case 1: imm8_mask_16 = 0x00FF; break;
10452 case 2: imm8_mask_16 = 0xFF00; break;
10453 case 3: imm8_mask_16 = 0xFFFF; break;
10454 default: vassert(0); break;
10455 }
10456 assign( imm8_mask, mkV128( imm8_mask_16 ) );
10457
10458 IRTemp res = newTemp(Ity_V128);
10459 assign ( res, binop( Iop_OrV128,
10460 binop( Iop_AndV128, mkexpr(sV),
10461 mkexpr(imm8_mask) ),
10462 binop( Iop_AndV128, mkexpr(dV),
10463 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10464 return res;
10465}
10466
10467
10468static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10469{
10470 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10471 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10472 breakupV256toV128s( sV, &sVhi, &sVlo );
10473 breakupV256toV128s( dV, &dVhi, &dVlo );
10474 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10475 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
10476 IRTemp rV = newTemp(Ity_V256);
10477 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10478 return rV;
10479}
10480
10481
10482static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10483{
10484 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
10485 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
10486 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
10487 0xFFFF };
10488 IRTemp imm8_mask = newTemp(Ity_V128);
10489 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
10490
10491 IRTemp res = newTemp(Ity_V128);
10492 assign ( res, binop( Iop_OrV128,
10493 binop( Iop_AndV128, mkexpr(sV),
10494 mkexpr(imm8_mask) ),
10495 binop( Iop_AndV128, mkexpr(dV),
10496 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10497 return res;
10498}
10499
10500
10501static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10502{
10503 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10504 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10505 breakupV256toV128s( sV, &sVhi, &sVlo );
10506 breakupV256toV128s( dV, &dVhi, &dVlo );
10507 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
10508 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
10509 IRTemp rV = newTemp(Ity_V256);
10510 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10511 return rV;
10512}
10513
10514
10515static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10516{
10517 /* Make w be a 16-bit version of imm8, formed by duplicating each
10518 bit in imm8. */
10519 Int i;
10520 UShort imm16 = 0;
10521 for (i = 0; i < 8; i++) {
10522 if (imm8 & (1 << i))
10523 imm16 |= (3 << (2*i));
10524 }
10525 IRTemp imm16_mask = newTemp(Ity_V128);
10526 assign( imm16_mask, mkV128( imm16 ));
10527
10528 IRTemp res = newTemp(Ity_V128);
10529 assign ( res, binop( Iop_OrV128,
10530 binop( Iop_AndV128, mkexpr(sV),
10531 mkexpr(imm16_mask) ),
10532 binop( Iop_AndV128, mkexpr(dV),
10533 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
10534 return res;
10535}
10536
10537
sewardje8a7eb72012-06-12 14:59:17 +000010538static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
10539{
10540 /* This is a really poor translation -- could be improved if
10541 performance critical */
10542 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10543 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010544 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10545 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000010546 IRTemp res = newTemp(Ity_V128);
10547 assign(res, binop(Iop_64HLtoV128,
10548 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
10549 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
10550 return res;
10551}
10552
10553
sewardj89378162012-06-24 12:12:20 +000010554static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
10555{
10556 /* This is a really poor translation -- could be improved if
10557 performance critical */
10558 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10559 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10560 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10561 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10562 IRTemp res = newTemp(Ity_V128);
10563 assign(res, binop(Iop_64HLtoV128,
10564 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
10565 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
10566 return res;
10567}
10568
10569
10570static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
10571{
10572 IRTemp sVhi, sVlo, dVhi, dVlo;
10573 IRTemp resHi = newTemp(Ity_I64);
10574 IRTemp resLo = newTemp(Ity_I64);
10575 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
10576 breakupV128to64s( sV, &sVhi, &sVlo );
10577 breakupV128to64s( dV, &dVhi, &dVlo );
10578 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
10579 "amd64g_calculate_mmx_pmaddwd",
10580 &amd64g_calculate_mmx_pmaddwd,
10581 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
10582 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
10583 "amd64g_calculate_mmx_pmaddwd",
10584 &amd64g_calculate_mmx_pmaddwd,
10585 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
10586 IRTemp res = newTemp(Ity_V128);
10587 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
10588 return res;
10589}
10590
10591
10592static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
10593{
10594 IRTemp addV = newTemp(Ity_V128);
10595 IRTemp subV = newTemp(Ity_V128);
10596 IRTemp a1 = newTemp(Ity_I64);
10597 IRTemp s0 = newTemp(Ity_I64);
10598
10599 assign( addV, binop(Iop_Add64Fx2, mkexpr(dV), mkexpr(sV)) );
10600 assign( subV, binop(Iop_Sub64Fx2, mkexpr(dV), mkexpr(sV)) );
10601
10602 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
10603 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
10604
10605 IRTemp res = newTemp(Ity_V128);
10606 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
10607 return res;
10608}
10609
10610
10611static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
10612{
10613 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
10614 IRTemp addV = newTemp(Ity_V256);
10615 IRTemp subV = newTemp(Ity_V256);
10616 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
10617
10618 assign( addV, binop(Iop_Add64Fx4, mkexpr(dV), mkexpr(sV)) );
10619 assign( subV, binop(Iop_Sub64Fx4, mkexpr(dV), mkexpr(sV)) );
10620
10621 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
10622 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
10623
10624 IRTemp res = newTemp(Ity_V256);
10625 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
10626 return res;
10627}
10628
10629
10630static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
10631{
10632 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
10633 IRTemp addV = newTemp(Ity_V128);
10634 IRTemp subV = newTemp(Ity_V128);
10635 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
10636
10637 assign( addV, binop(Iop_Add32Fx4, mkexpr(dV), mkexpr(sV)) );
10638 assign( subV, binop(Iop_Sub32Fx4, mkexpr(dV), mkexpr(sV)) );
10639
10640 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
10641 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
10642
10643 IRTemp res = newTemp(Ity_V128);
10644 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
10645 return res;
10646}
10647
10648
10649static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
10650{
10651 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
10652 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
10653 IRTemp addV = newTemp(Ity_V256);
10654 IRTemp subV = newTemp(Ity_V256);
10655 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
10656 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
10657
10658 assign( addV, binop(Iop_Add32Fx8, mkexpr(dV), mkexpr(sV)) );
10659 assign( subV, binop(Iop_Sub32Fx8, mkexpr(dV), mkexpr(sV)) );
10660
10661 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
10662 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
10663
10664 IRTemp res = newTemp(Ity_V256);
10665 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
10666 return res;
10667}
10668
10669
sewardj251b59e2012-05-25 13:51:07 +000010670/* Handle 128 bit PSHUFLW and PSHUFHW. */
10671static Long dis_PSHUFxW_128 ( VexAbiInfo* vbi, Prefix pfx,
10672 Long delta, Bool isAvx, Bool xIsH )
10673{
10674 IRTemp addr = IRTemp_INVALID;
10675 Int alen = 0;
10676 HChar dis_buf[50];
10677 UChar modrm = getUChar(delta);
10678 UInt rG = gregOfRexRM(pfx,modrm);
10679 UInt imm8;
10680 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
10681 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10682 sV = newTemp(Ity_V128);
10683 dV = newTemp(Ity_V128);
10684 sVmut = newTemp(Ity_I64);
10685 dVmut = newTemp(Ity_I64);
10686 sVcon = newTemp(Ity_I64);
10687 if (epartIsReg(modrm)) {
10688 UInt rE = eregOfRexRM(pfx,modrm);
10689 assign( sV, getXMMReg(rE) );
10690 imm8 = (UInt)getUChar(delta+1);
10691 delta += 1+1;
10692 DIP("%spshuf%cw $%u,%s,%s\n",
10693 isAvx ? "v" : "", xIsH ? 'h' : 'l',
10694 imm8, nameXMMReg(rE), nameXMMReg(rG));
10695 } else {
10696 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
10697 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10698 imm8 = (UInt)getUChar(delta+alen);
10699 delta += alen+1;
10700 DIP("%spshuf%cw $%u,%s,%s\n",
10701 isAvx ? "v" : "", xIsH ? 'h' : 'l',
10702 imm8, dis_buf, nameXMMReg(rG));
10703 }
10704
10705 /* Get the to-be-changed (mut) and unchanging (con) bits of the
10706 source. */
10707 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
10708 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
10709
10710 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
10711# define SEL(n) \
10712 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10713 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
10714 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
10715# undef SEL
10716
10717 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
10718 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
10719
10720 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
10721 return delta;
10722}
10723
10724
sewardje8a7eb72012-06-12 14:59:17 +000010725static Long dis_PEXTRW_128_EregOnly_toG ( VexAbiInfo* vbi, Prefix pfx,
10726 Long delta, Bool isAvx )
10727{
10728 Long deltaIN = delta;
10729 UChar modrm = getUChar(delta);
10730 UInt rG = gregOfRexRM(pfx,modrm);
10731 IRTemp sV = newTemp(Ity_V128);
10732 IRTemp d16 = newTemp(Ity_I16);
10733 UInt imm8;
10734 IRTemp s0, s1, s2, s3;
10735 if (epartIsReg(modrm)) {
10736 UInt rE = eregOfRexRM(pfx,modrm);
10737 assign(sV, getXMMReg(rE));
10738 imm8 = getUChar(delta+1) & 7;
10739 delta += 1+1;
10740 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "",
10741 (Int)imm8, nameXMMReg(rE), nameIReg32(rG));
10742 } else {
10743 /* The memory case is disallowed, apparently. */
10744 return deltaIN; /* FAIL */
10745 }
10746 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010747 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000010748 switch (imm8) {
10749 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
10750 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
10751 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
10752 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
10753 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
10754 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
10755 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
10756 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
10757 default: vassert(0);
10758 }
10759 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
10760 return delta;
10761}
10762
10763
sewardj4b1cc832012-06-13 11:10:20 +000010764static Long dis_CVTDQ2PD_128 ( VexAbiInfo* vbi, Prefix pfx,
10765 Long delta, Bool isAvx )
10766{
10767 IRTemp addr = IRTemp_INVALID;
10768 Int alen = 0;
10769 HChar dis_buf[50];
10770 UChar modrm = getUChar(delta);
10771 IRTemp arg64 = newTemp(Ity_I64);
10772 UInt rG = gregOfRexRM(pfx,modrm);
10773 UChar* mbV = isAvx ? "v" : "";
10774 if (epartIsReg(modrm)) {
10775 UInt rE = eregOfRexRM(pfx,modrm);
10776 assign( arg64, getXMMRegLane64(rE, 0) );
10777 delta += 1;
10778 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
10779 } else {
10780 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10781 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
10782 delta += alen;
10783 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
10784 }
10785 putXMMRegLane64F(
10786 rG, 0,
10787 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
10788 );
10789 putXMMRegLane64F(
10790 rG, 1,
10791 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
10792 );
10793 if (isAvx)
10794 putYMMRegLane128(rG, 1, mkV128(0));
10795 return delta;
10796}
10797
10798
sewardjfe0c5e72012-06-15 15:48:07 +000010799static Long dis_STMXCSR ( VexAbiInfo* vbi, Prefix pfx,
10800 Long delta, Bool isAvx )
10801{
10802 IRTemp addr = IRTemp_INVALID;
10803 Int alen = 0;
10804 HChar dis_buf[50];
10805 UChar modrm = getUChar(delta);
10806 vassert(!epartIsReg(modrm)); /* ensured by caller */
10807 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
10808
10809 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10810 delta += alen;
10811
10812 /* Fake up a native SSE mxcsr word. The only thing it depends on
10813 is SSEROUND[1:0], so call a clean helper to cook it up.
10814 */
10815 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
10816 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
10817 storeLE(
10818 mkexpr(addr),
10819 unop(Iop_64to32,
10820 mkIRExprCCall(
10821 Ity_I64, 0/*regp*/,
10822 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
10823 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
10824 )
10825 )
10826 );
10827 return delta;
10828}
10829
10830
10831static Long dis_LDMXCSR ( VexAbiInfo* vbi, Prefix pfx,
10832 Long delta, Bool isAvx )
10833{
10834 IRTemp addr = IRTemp_INVALID;
10835 Int alen = 0;
10836 HChar dis_buf[50];
10837 UChar modrm = getUChar(delta);
10838 vassert(!epartIsReg(modrm)); /* ensured by caller */
10839 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
10840
10841 IRTemp t64 = newTemp(Ity_I64);
10842 IRTemp ew = newTemp(Ity_I32);
10843
10844 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10845 delta += alen;
10846 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
10847
10848 /* The only thing we observe in %mxcsr is the rounding mode.
10849 Therefore, pass the 32-bit value (SSE native-format control
10850 word) to a clean helper, getting back a 64-bit value, the
10851 lower half of which is the SSEROUND value to store, and the
10852 upper half of which is the emulation-warning token which may
10853 be generated.
10854 */
10855 /* ULong amd64h_check_ldmxcsr ( ULong ); */
10856 assign( t64, mkIRExprCCall(
10857 Ity_I64, 0/*regparms*/,
10858 "amd64g_check_ldmxcsr",
10859 &amd64g_check_ldmxcsr,
10860 mkIRExprVec_1(
10861 unop(Iop_32Uto64,
10862 loadLE(Ity_I32, mkexpr(addr))
10863 )
10864 )
10865 )
10866 );
10867
10868 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
10869 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
10870 put_emwarn( mkexpr(ew) );
10871 /* Finally, if an emulation warning was reported, side-exit to
10872 the next insn, reporting the warning, so that Valgrind's
10873 dispatcher sees the warning. */
10874 stmt(
10875 IRStmt_Exit(
10876 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
10877 Ijk_EmWarn,
10878 IRConst_U64(guest_RIP_bbstart+delta),
10879 OFFB_RIP
10880 )
10881 );
10882 return delta;
10883}
10884
10885
sewardj4ed05e02012-06-18 15:01:30 +000010886static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
10887{
10888 vassert(imm8 >= 0 && imm8 <= 7);
10889
10890 // Create a V128 value which has the selected word in the
10891 // specified lane, and zeroes everywhere else.
10892 IRTemp tmp128 = newTemp(Ity_V128);
10893 IRTemp halfshift = newTemp(Ity_I64);
10894 assign(halfshift, binop(Iop_Shl64,
10895 unop(Iop_16Uto64, mkexpr(u16)),
10896 mkU8(16 * (imm8 & 3))));
10897 if (imm8 < 4) {
10898 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
10899 } else {
10900 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
10901 }
10902
10903 UShort mask = ~(3 << (imm8 * 2));
10904 IRTemp res = newTemp(Ity_V128);
10905 assign( res, binop(Iop_OrV128,
10906 mkexpr(tmp128),
10907 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
10908 return res;
10909}
10910
10911
sewardj82096922012-06-24 14:57:59 +000010912static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
10913{
10914 IRTemp s1, s0, d1, d0;
10915 s1 = s0 = d1 = d0 = IRTemp_INVALID;
10916
10917 breakupV128to64s( sV, &s1, &s0 );
10918 breakupV128to64s( dV, &d1, &d0 );
10919
10920 IRTemp res = newTemp(Ity_V128);
10921 assign( res,
10922 binop(Iop_64HLtoV128,
10923 mkIRExprCCall(Ity_I64, 0/*regparms*/,
10924 "amd64g_calculate_mmx_psadbw",
10925 &amd64g_calculate_mmx_psadbw,
10926 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
10927 mkIRExprCCall(Ity_I64, 0/*regparms*/,
10928 "amd64g_calculate_mmx_psadbw",
10929 &amd64g_calculate_mmx_psadbw,
10930 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
10931 return res;
10932}
10933
10934
sewardj8eb7ae82012-06-24 14:00:27 +000010935static Long dis_MASKMOVDQU ( VexAbiInfo* vbi, Prefix pfx,
10936 Long delta, Bool isAvx )
10937{
10938 IRTemp regD = newTemp(Ity_V128);
10939 IRTemp mask = newTemp(Ity_V128);
10940 IRTemp olddata = newTemp(Ity_V128);
10941 IRTemp newdata = newTemp(Ity_V128);
10942 IRTemp addr = newTemp(Ity_I64);
10943 UChar modrm = getUChar(delta);
10944 UInt rG = gregOfRexRM(pfx,modrm);
10945 UInt rE = eregOfRexRM(pfx,modrm);
10946
10947 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
10948 assign( regD, getXMMReg( rG ));
10949
10950 /* Unfortunately can't do the obvious thing with SarN8x16
10951 here since that can't be re-emitted as SSE2 code - no such
10952 insn. */
10953 assign( mask,
10954 binop(Iop_64HLtoV128,
10955 binop(Iop_SarN8x8,
10956 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
10957 mkU8(7) ),
10958 binop(Iop_SarN8x8,
10959 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
10960 mkU8(7) ) ));
10961 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
10962 assign( newdata, binop(Iop_OrV128,
10963 binop(Iop_AndV128,
10964 mkexpr(regD),
10965 mkexpr(mask) ),
10966 binop(Iop_AndV128,
10967 mkexpr(olddata),
10968 unop(Iop_NotV128, mkexpr(mask)))) );
10969 storeLE( mkexpr(addr), mkexpr(newdata) );
10970
10971 delta += 1;
10972 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
10973 nameXMMReg(rE), nameXMMReg(rG) );
10974 return delta;
10975}
10976
10977
10978static Long dis_MOVMSKPS_128 ( VexAbiInfo* vbi, Prefix pfx,
10979 Long delta, Bool isAvx )
10980{
10981 UChar modrm = getUChar(delta);
10982 UInt rG = gregOfRexRM(pfx,modrm);
10983 UInt rE = eregOfRexRM(pfx,modrm);
10984 IRTemp t0 = newTemp(Ity_I32);
10985 IRTemp t1 = newTemp(Ity_I32);
10986 IRTemp t2 = newTemp(Ity_I32);
10987 IRTemp t3 = newTemp(Ity_I32);
10988 delta += 1;
10989 assign( t0, binop( Iop_And32,
10990 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
10991 mkU32(1) ));
10992 assign( t1, binop( Iop_And32,
10993 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
10994 mkU32(2) ));
10995 assign( t2, binop( Iop_And32,
10996 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
10997 mkU32(4) ));
10998 assign( t3, binop( Iop_And32,
10999 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
11000 mkU32(8) ));
11001 putIReg32( rG, binop(Iop_Or32,
11002 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11003 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11004 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
11005 nameXMMReg(rE), nameIReg32(rG));
11006 return delta;
11007}
11008
11009
11010static Long dis_MOVMSKPS_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
11011{
11012 UChar modrm = getUChar(delta);
11013 UInt rG = gregOfRexRM(pfx,modrm);
11014 UInt rE = eregOfRexRM(pfx,modrm);
11015 IRTemp t0 = newTemp(Ity_I32);
11016 IRTemp t1 = newTemp(Ity_I32);
11017 IRTemp t2 = newTemp(Ity_I32);
11018 IRTemp t3 = newTemp(Ity_I32);
11019 IRTemp t4 = newTemp(Ity_I32);
11020 IRTemp t5 = newTemp(Ity_I32);
11021 IRTemp t6 = newTemp(Ity_I32);
11022 IRTemp t7 = newTemp(Ity_I32);
11023 delta += 1;
11024 assign( t0, binop( Iop_And32,
11025 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
11026 mkU32(1) ));
11027 assign( t1, binop( Iop_And32,
11028 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
11029 mkU32(2) ));
11030 assign( t2, binop( Iop_And32,
11031 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
11032 mkU32(4) ));
11033 assign( t3, binop( Iop_And32,
11034 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
11035 mkU32(8) ));
11036 assign( t4, binop( Iop_And32,
11037 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
11038 mkU32(16) ));
11039 assign( t5, binop( Iop_And32,
11040 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
11041 mkU32(32) ));
11042 assign( t6, binop( Iop_And32,
11043 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
11044 mkU32(64) ));
11045 assign( t7, binop( Iop_And32,
11046 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
11047 mkU32(128) ));
11048 putIReg32( rG, binop(Iop_Or32,
11049 binop(Iop_Or32,
11050 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11051 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
11052 binop(Iop_Or32,
11053 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
11054 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
11055 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11056 return delta;
11057}
11058
11059
11060static Long dis_MOVMSKPD_128 ( VexAbiInfo* vbi, Prefix pfx,
11061 Long delta, Bool isAvx )
11062{
11063 UChar modrm = getUChar(delta);
11064 UInt rG = gregOfRexRM(pfx,modrm);
11065 UInt rE = eregOfRexRM(pfx,modrm);
11066 IRTemp t0 = newTemp(Ity_I32);
11067 IRTemp t1 = newTemp(Ity_I32);
11068 delta += 1;
11069 assign( t0, binop( Iop_And32,
11070 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
11071 mkU32(1) ));
11072 assign( t1, binop( Iop_And32,
11073 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
11074 mkU32(2) ));
11075 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
11076 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
11077 nameXMMReg(rE), nameIReg32(rG));
11078 return delta;
11079}
11080
11081
11082static Long dis_MOVMSKPD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
11083{
11084 UChar modrm = getUChar(delta);
11085 UInt rG = gregOfRexRM(pfx,modrm);
11086 UInt rE = eregOfRexRM(pfx,modrm);
11087 IRTemp t0 = newTemp(Ity_I32);
11088 IRTemp t1 = newTemp(Ity_I32);
11089 IRTemp t2 = newTemp(Ity_I32);
11090 IRTemp t3 = newTemp(Ity_I32);
11091 delta += 1;
11092 assign( t0, binop( Iop_And32,
11093 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
11094 mkU32(1) ));
11095 assign( t1, binop( Iop_And32,
11096 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
11097 mkU32(2) ));
11098 assign( t2, binop( Iop_And32,
11099 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
11100 mkU32(4) ));
11101 assign( t3, binop( Iop_And32,
11102 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
11103 mkU32(8) ));
11104 putIReg32( rG, binop(Iop_Or32,
11105 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11106 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11107 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11108 return delta;
11109}
11110
11111
sewardj80611e32012-01-20 13:07:24 +000011112/* Note, this also handles SSE(1) insns. */
11113__attribute__((noinline))
11114static
11115Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
11116 VexAbiInfo* vbi,
sewardj30fc0582012-02-16 13:45:13 +000011117 Prefix pfx, Int sz, Long deltaIN,
11118 DisResult* dres )
sewardj80611e32012-01-20 13:07:24 +000011119{
11120 IRTemp addr = IRTemp_INVALID;
11121 IRTemp t0 = IRTemp_INVALID;
11122 IRTemp t1 = IRTemp_INVALID;
11123 IRTemp t2 = IRTemp_INVALID;
11124 IRTemp t3 = IRTemp_INVALID;
11125 IRTemp t4 = IRTemp_INVALID;
11126 IRTemp t5 = IRTemp_INVALID;
11127 IRTemp t6 = IRTemp_INVALID;
11128 UChar modrm = 0;
11129 Int alen = 0;
11130 HChar dis_buf[50];
11131
11132 *decode_OK = False;
11133
11134 Long delta = deltaIN;
11135 UChar opc = getUChar(delta);
11136 delta++;
11137 switch (opc) {
11138
11139 case 0x10:
11140 if (have66noF2noF3(pfx)
11141 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11142 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
11143 modrm = getUChar(delta);
11144 if (epartIsReg(modrm)) {
11145 putXMMReg( gregOfRexRM(pfx,modrm),
11146 getXMMReg( eregOfRexRM(pfx,modrm) ));
11147 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11148 nameXMMReg(gregOfRexRM(pfx,modrm)));
11149 delta += 1;
11150 } else {
11151 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11152 putXMMReg( gregOfRexRM(pfx,modrm),
11153 loadLE(Ity_V128, mkexpr(addr)) );
11154 DIP("movupd %s,%s\n", dis_buf,
11155 nameXMMReg(gregOfRexRM(pfx,modrm)));
11156 delta += alen;
11157 }
11158 goto decode_success;
11159 }
11160 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11161 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11162 If E is reg, upper half of G is unchanged. */
11163 if (haveF2no66noF3(pfx)
11164 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
11165 modrm = getUChar(delta);
11166 if (epartIsReg(modrm)) {
11167 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11168 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11169 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11170 nameXMMReg(gregOfRexRM(pfx,modrm)));
11171 delta += 1;
11172 } else {
11173 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11174 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11175 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11176 loadLE(Ity_I64, mkexpr(addr)) );
11177 DIP("movsd %s,%s\n", dis_buf,
11178 nameXMMReg(gregOfRexRM(pfx,modrm)));
11179 delta += alen;
11180 }
11181 goto decode_success;
11182 }
11183 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
11184 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
11185 if (haveF3no66noF2(pfx)
11186 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11187 modrm = getUChar(delta);
11188 if (epartIsReg(modrm)) {
11189 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11190 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
11191 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11192 nameXMMReg(gregOfRexRM(pfx,modrm)));
11193 delta += 1;
11194 } else {
11195 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11196 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11197 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11198 loadLE(Ity_I32, mkexpr(addr)) );
11199 DIP("movss %s,%s\n", dis_buf,
11200 nameXMMReg(gregOfRexRM(pfx,modrm)));
11201 delta += alen;
11202 }
11203 goto decode_success;
11204 }
11205 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
11206 if (haveNo66noF2noF3(pfx)
11207 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11208 modrm = getUChar(delta);
11209 if (epartIsReg(modrm)) {
11210 putXMMReg( gregOfRexRM(pfx,modrm),
11211 getXMMReg( eregOfRexRM(pfx,modrm) ));
11212 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11213 nameXMMReg(gregOfRexRM(pfx,modrm)));
11214 delta += 1;
11215 } else {
11216 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11217 putXMMReg( gregOfRexRM(pfx,modrm),
11218 loadLE(Ity_V128, mkexpr(addr)) );
11219 DIP("movups %s,%s\n", dis_buf,
11220 nameXMMReg(gregOfRexRM(pfx,modrm)));
11221 delta += alen;
11222 }
11223 goto decode_success;
11224 }
11225 break;
11226
11227 case 0x11:
11228 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11229 or lo half xmm). */
11230 if (haveF2no66noF3(pfx)
11231 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11232 modrm = getUChar(delta);
11233 if (epartIsReg(modrm)) {
11234 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
11235 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11236 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11237 nameXMMReg(eregOfRexRM(pfx,modrm)));
11238 delta += 1;
11239 } else {
11240 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11241 storeLE( mkexpr(addr),
11242 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11243 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11244 dis_buf);
11245 delta += alen;
11246 }
11247 goto decode_success;
11248 }
11249 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
11250 or lo 1/4 xmm). */
11251 if (haveF3no66noF2(pfx) && sz == 4) {
11252 modrm = getUChar(delta);
11253 if (epartIsReg(modrm)) {
11254 /* fall through, we don't yet have a test case */
11255 } else {
11256 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11257 storeLE( mkexpr(addr),
11258 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
11259 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11260 dis_buf);
11261 delta += alen;
11262 goto decode_success;
11263 }
11264 }
11265 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
11266 if (have66noF2noF3(pfx)
11267 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11268 modrm = getUChar(delta);
11269 if (epartIsReg(modrm)) {
11270 putXMMReg( eregOfRexRM(pfx,modrm),
11271 getXMMReg( gregOfRexRM(pfx,modrm) ) );
11272 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11273 nameXMMReg(eregOfRexRM(pfx,modrm)));
11274 delta += 1;
11275 } else {
11276 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11277 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11278 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11279 dis_buf );
11280 delta += alen;
11281 }
11282 goto decode_success;
11283 }
11284 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
11285 if (haveNo66noF2noF3(pfx)
11286 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11287 modrm = getUChar(delta);
11288 if (epartIsReg(modrm)) {
11289 /* fall through; awaiting test case */
11290 } else {
11291 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11292 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11293 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11294 dis_buf );
11295 delta += alen;
11296 goto decode_success;
11297 }
11298 }
11299 break;
11300
11301 case 0x12:
11302 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
11303 /* Identical to MOVLPS ? */
11304 if (have66noF2noF3(pfx)
11305 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11306 modrm = getUChar(delta);
11307 if (epartIsReg(modrm)) {
11308 /* fall through; apparently reg-reg is not possible */
11309 } else {
11310 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11311 delta += alen;
11312 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11313 0/*lower lane*/,
11314 loadLE(Ity_I64, mkexpr(addr)) );
11315 DIP("movlpd %s, %s\n",
11316 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11317 goto decode_success;
11318 }
11319 }
11320 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
11321 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
11322 if (haveNo66noF2noF3(pfx)
11323 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11324 modrm = getUChar(delta);
11325 if (epartIsReg(modrm)) {
11326 delta += 1;
11327 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11328 0/*lower lane*/,
11329 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
11330 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11331 nameXMMReg(gregOfRexRM(pfx,modrm)));
11332 } else {
11333 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11334 delta += alen;
11335 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
11336 loadLE(Ity_I64, mkexpr(addr)) );
11337 DIP("movlps %s, %s\n",
11338 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11339 }
11340 goto decode_success;
11341 }
11342 break;
11343
11344 case 0x13:
11345 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
11346 if (haveNo66noF2noF3(pfx)
11347 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11348 modrm = getUChar(delta);
11349 if (!epartIsReg(modrm)) {
11350 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11351 delta += alen;
11352 storeLE( mkexpr(addr),
11353 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11354 0/*lower lane*/ ) );
11355 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11356 dis_buf);
11357 goto decode_success;
11358 }
11359 /* else fall through */
11360 }
11361 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
11362 /* Identical to MOVLPS ? */
11363 if (have66noF2noF3(pfx)
11364 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11365 modrm = getUChar(delta);
11366 if (!epartIsReg(modrm)) {
11367 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11368 delta += alen;
11369 storeLE( mkexpr(addr),
11370 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11371 0/*lower lane*/ ) );
11372 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11373 dis_buf);
11374 goto decode_success;
11375 }
11376 /* else fall through */
11377 }
11378 break;
11379
11380 case 0x14:
11381 case 0x15:
11382 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
11383 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
11384 /* These just appear to be special cases of SHUFPS */
11385 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000011386 Bool hi = toBool(opc == 0x15);
11387 IRTemp sV = newTemp(Ity_V128);
11388 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000011389 modrm = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000011390 UInt rG = gregOfRexRM(pfx,modrm);
11391 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000011392 if (epartIsReg(modrm)) {
sewardjc4530ae2012-05-21 10:18:49 +000011393 UInt rE = eregOfRexRM(pfx,modrm);
11394 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000011395 delta += 1;
11396 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000011397 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000011398 } else {
11399 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11400 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11401 delta += alen;
11402 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000011403 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000011404 }
sewardj56c30312012-06-12 08:45:39 +000011405 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000011406 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000011407 goto decode_success;
11408 }
11409 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
11410 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
11411 /* These just appear to be special cases of SHUFPS */
11412 if (have66noF2noF3(pfx)
11413 && sz == 2 /* could be 8 if rex also present */) {
sewardj56c30312012-06-12 08:45:39 +000011414 Bool hi = toBool(opc == 0x15);
sewardj80611e32012-01-20 13:07:24 +000011415 IRTemp sV = newTemp(Ity_V128);
11416 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000011417 modrm = getUChar(delta);
sewardj56c30312012-06-12 08:45:39 +000011418 UInt rG = gregOfRexRM(pfx,modrm);
11419 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000011420 if (epartIsReg(modrm)) {
sewardj56c30312012-06-12 08:45:39 +000011421 UInt rE = eregOfRexRM(pfx,modrm);
11422 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000011423 delta += 1;
11424 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000011425 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000011426 } else {
11427 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11428 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11429 delta += alen;
11430 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000011431 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000011432 }
sewardj56c30312012-06-12 08:45:39 +000011433 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
11434 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000011435 goto decode_success;
11436 }
11437 break;
11438
11439 case 0x16:
11440 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
11441 /* These seems identical to MOVHPS. This instruction encoding is
11442 completely crazy. */
11443 if (have66noF2noF3(pfx)
11444 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11445 modrm = getUChar(delta);
11446 if (epartIsReg(modrm)) {
11447 /* fall through; apparently reg-reg is not possible */
11448 } else {
11449 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11450 delta += alen;
11451 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
11452 loadLE(Ity_I64, mkexpr(addr)) );
11453 DIP("movhpd %s,%s\n", dis_buf,
11454 nameXMMReg( gregOfRexRM(pfx,modrm) ));
11455 goto decode_success;
11456 }
11457 }
11458 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
11459 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
11460 if (haveNo66noF2noF3(pfx)
11461 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11462 modrm = getUChar(delta);
11463 if (epartIsReg(modrm)) {
11464 delta += 1;
11465 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
11466 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
11467 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11468 nameXMMReg(gregOfRexRM(pfx,modrm)));
11469 } else {
11470 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11471 delta += alen;
11472 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
11473 loadLE(Ity_I64, mkexpr(addr)) );
11474 DIP("movhps %s,%s\n", dis_buf,
11475 nameXMMReg( gregOfRexRM(pfx,modrm) ));
11476 }
11477 goto decode_success;
11478 }
11479 break;
11480
11481 case 0x17:
11482 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
11483 if (haveNo66noF2noF3(pfx)
11484 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11485 modrm = getUChar(delta);
11486 if (!epartIsReg(modrm)) {
11487 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11488 delta += alen;
11489 storeLE( mkexpr(addr),
11490 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11491 1/*upper lane*/ ) );
11492 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11493 dis_buf);
11494 goto decode_success;
11495 }
11496 /* else fall through */
11497 }
11498 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
11499 /* Again, this seems identical to MOVHPS. */
11500 if (have66noF2noF3(pfx)
11501 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11502 modrm = getUChar(delta);
11503 if (!epartIsReg(modrm)) {
11504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11505 delta += alen;
11506 storeLE( mkexpr(addr),
11507 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11508 1/*upper lane*/ ) );
11509 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11510 dis_buf);
11511 goto decode_success;
11512 }
11513 /* else fall through */
11514 }
11515 break;
11516
11517 case 0x18:
11518 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
11519 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
11520 /* 0F 18 /2 = PREFETCH1 */
11521 /* 0F 18 /3 = PREFETCH2 */
11522 if (haveNo66noF2noF3(pfx)
11523 && !epartIsReg(getUChar(delta))
11524 && gregLO3ofRM(getUChar(delta)) >= 0
11525 && gregLO3ofRM(getUChar(delta)) <= 3) {
11526 HChar* hintstr = "??";
11527
11528 modrm = getUChar(delta);
11529 vassert(!epartIsReg(modrm));
11530
11531 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11532 delta += alen;
11533
11534 switch (gregLO3ofRM(modrm)) {
11535 case 0: hintstr = "nta"; break;
11536 case 1: hintstr = "t0"; break;
11537 case 2: hintstr = "t1"; break;
11538 case 3: hintstr = "t2"; break;
11539 default: vassert(0);
11540 }
11541
11542 DIP("prefetch%s %s\n", hintstr, dis_buf);
11543 goto decode_success;
11544 }
11545 break;
11546
11547 case 0x28:
11548 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
11549 if (have66noF2noF3(pfx)
11550 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11551 modrm = getUChar(delta);
11552 if (epartIsReg(modrm)) {
11553 putXMMReg( gregOfRexRM(pfx,modrm),
11554 getXMMReg( eregOfRexRM(pfx,modrm) ));
11555 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11556 nameXMMReg(gregOfRexRM(pfx,modrm)));
11557 delta += 1;
11558 } else {
11559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11560 gen_SEGV_if_not_16_aligned( addr );
11561 putXMMReg( gregOfRexRM(pfx,modrm),
11562 loadLE(Ity_V128, mkexpr(addr)) );
11563 DIP("movapd %s,%s\n", dis_buf,
11564 nameXMMReg(gregOfRexRM(pfx,modrm)));
11565 delta += alen;
11566 }
11567 goto decode_success;
11568 }
11569 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
11570 if (haveNo66noF2noF3(pfx)
11571 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11572 modrm = getUChar(delta);
11573 if (epartIsReg(modrm)) {
11574 putXMMReg( gregOfRexRM(pfx,modrm),
11575 getXMMReg( eregOfRexRM(pfx,modrm) ));
11576 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11577 nameXMMReg(gregOfRexRM(pfx,modrm)));
11578 delta += 1;
11579 } else {
11580 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11581 gen_SEGV_if_not_16_aligned( addr );
11582 putXMMReg( gregOfRexRM(pfx,modrm),
11583 loadLE(Ity_V128, mkexpr(addr)) );
11584 DIP("movaps %s,%s\n", dis_buf,
11585 nameXMMReg(gregOfRexRM(pfx,modrm)));
11586 delta += alen;
11587 }
11588 goto decode_success;
11589 }
11590 break;
11591
11592 case 0x29:
11593 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
11594 if (haveNo66noF2noF3(pfx)
11595 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11596 modrm = getUChar(delta);
11597 if (epartIsReg(modrm)) {
11598 /* fall through; awaiting test case */
11599 } else {
11600 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11601 gen_SEGV_if_not_16_aligned( addr );
11602 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11603 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11604 dis_buf );
11605 delta += alen;
11606 goto decode_success;
11607 }
11608 }
11609 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
11610 if (have66noF2noF3(pfx)
11611 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11612 modrm = getUChar(delta);
11613 if (epartIsReg(modrm)) {
11614 putXMMReg( eregOfRexRM(pfx,modrm),
11615 getXMMReg( gregOfRexRM(pfx,modrm) ) );
11616 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11617 nameXMMReg(eregOfRexRM(pfx,modrm)));
11618 delta += 1;
11619 } else {
11620 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11621 gen_SEGV_if_not_16_aligned( addr );
11622 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11623 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11624 dis_buf );
11625 delta += alen;
11626 }
11627 goto decode_success;
11628 }
11629 break;
11630
11631 case 0x2A:
11632 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
11633 half xmm */
11634 if (haveNo66noF2noF3(pfx) && sz == 4) {
11635 IRTemp arg64 = newTemp(Ity_I64);
11636 IRTemp rmode = newTemp(Ity_I32);
11637
11638 modrm = getUChar(delta);
11639 do_MMX_preamble();
11640 if (epartIsReg(modrm)) {
11641 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
11642 delta += 1;
11643 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
11644 nameXMMReg(gregOfRexRM(pfx,modrm)));
11645 } else {
11646 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11647 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11648 delta += alen;
11649 DIP("cvtpi2ps %s,%s\n", dis_buf,
11650 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11651 }
11652
11653 assign( rmode, get_sse_roundingmode() );
11654
11655 putXMMRegLane32F(
11656 gregOfRexRM(pfx,modrm), 0,
11657 binop(Iop_F64toF32,
11658 mkexpr(rmode),
11659 unop(Iop_I32StoF64,
11660 unop(Iop_64to32, mkexpr(arg64)) )) );
11661
11662 putXMMRegLane32F(
11663 gregOfRexRM(pfx,modrm), 1,
11664 binop(Iop_F64toF32,
11665 mkexpr(rmode),
11666 unop(Iop_I32StoF64,
11667 unop(Iop_64HIto32, mkexpr(arg64)) )) );
11668
11669 goto decode_success;
11670 }
11671 /* F3 0F 2A = CVTSI2SS
11672 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
11673 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
11674 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
11675 IRTemp rmode = newTemp(Ity_I32);
11676 assign( rmode, get_sse_roundingmode() );
11677 modrm = getUChar(delta);
11678 if (sz == 4) {
11679 IRTemp arg32 = newTemp(Ity_I32);
11680 if (epartIsReg(modrm)) {
11681 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
11682 delta += 1;
11683 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
11684 nameXMMReg(gregOfRexRM(pfx,modrm)));
11685 } else {
11686 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11687 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
11688 delta += alen;
11689 DIP("cvtsi2ss %s,%s\n", dis_buf,
11690 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11691 }
11692 putXMMRegLane32F(
11693 gregOfRexRM(pfx,modrm), 0,
11694 binop(Iop_F64toF32,
11695 mkexpr(rmode),
11696 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
11697 } else {
11698 /* sz == 8 */
11699 IRTemp arg64 = newTemp(Ity_I64);
11700 if (epartIsReg(modrm)) {
11701 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
11702 delta += 1;
11703 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
11704 nameXMMReg(gregOfRexRM(pfx,modrm)));
11705 } else {
11706 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11707 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11708 delta += alen;
11709 DIP("cvtsi2ssq %s,%s\n", dis_buf,
11710 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11711 }
11712 putXMMRegLane32F(
11713 gregOfRexRM(pfx,modrm), 0,
11714 binop(Iop_F64toF32,
11715 mkexpr(rmode),
11716 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
11717 }
11718 goto decode_success;
11719 }
11720 /* F2 0F 2A = CVTSI2SD
11721 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
11722 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
11723 */
11724 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
11725 modrm = getUChar(delta);
11726 if (sz == 4) {
11727 IRTemp arg32 = newTemp(Ity_I32);
11728 if (epartIsReg(modrm)) {
11729 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
11730 delta += 1;
sewardjc4530ae2012-05-21 10:18:49 +000011731 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
11732 nameXMMReg(gregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000011733 } else {
11734 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11735 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
11736 delta += alen;
sewardjc4530ae2012-05-21 10:18:49 +000011737 DIP("cvtsi2sdl %s,%s\n", dis_buf,
11738 nameXMMReg(gregOfRexRM(pfx,modrm)) );
sewardj80611e32012-01-20 13:07:24 +000011739 }
11740 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
11741 unop(Iop_I32StoF64, mkexpr(arg32))
11742 );
11743 } else {
11744 /* sz == 8 */
11745 IRTemp arg64 = newTemp(Ity_I64);
11746 if (epartIsReg(modrm)) {
11747 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
11748 delta += 1;
11749 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
11750 nameXMMReg(gregOfRexRM(pfx,modrm)));
11751 } else {
11752 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11753 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11754 delta += alen;
11755 DIP("cvtsi2sdq %s,%s\n", dis_buf,
11756 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11757 }
11758 putXMMRegLane64F(
11759 gregOfRexRM(pfx,modrm),
11760 0,
11761 binop( Iop_I64StoF64,
11762 get_sse_roundingmode(),
11763 mkexpr(arg64)
11764 )
11765 );
11766 }
11767 goto decode_success;
11768 }
11769 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
11770 xmm(G) */
11771 if (have66noF2noF3(pfx) && sz == 2) {
11772 IRTemp arg64 = newTemp(Ity_I64);
11773
11774 modrm = getUChar(delta);
11775 if (epartIsReg(modrm)) {
11776 /* Only switch to MMX mode if the source is a MMX register.
11777 This is inconsistent with all other instructions which
11778 convert between XMM and (M64 or MMX), which always switch
11779 to MMX mode even if 64-bit operand is M64 and not MMX. At
11780 least, that's what the Intel docs seem to me to say.
11781 Fixes #210264. */
11782 do_MMX_preamble();
11783 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
11784 delta += 1;
11785 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
11786 nameXMMReg(gregOfRexRM(pfx,modrm)));
11787 } else {
11788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11789 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11790 delta += alen;
11791 DIP("cvtpi2pd %s,%s\n", dis_buf,
11792 nameXMMReg(gregOfRexRM(pfx,modrm)) );
11793 }
11794
11795 putXMMRegLane64F(
11796 gregOfRexRM(pfx,modrm), 0,
11797 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
11798 );
11799
11800 putXMMRegLane64F(
11801 gregOfRexRM(pfx,modrm), 1,
11802 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
11803 );
11804
11805 goto decode_success;
11806 }
11807 break;
11808
11809 case 0x2B:
11810 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
11811 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
11812 if ( (haveNo66noF2noF3(pfx) && sz == 4)
11813 || (have66noF2noF3(pfx) && sz == 2) ) {
11814 modrm = getUChar(delta);
11815 if (!epartIsReg(modrm)) {
11816 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11817 gen_SEGV_if_not_16_aligned( addr );
11818 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11819 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
11820 dis_buf,
11821 nameXMMReg(gregOfRexRM(pfx,modrm)));
11822 delta += alen;
11823 goto decode_success;
11824 }
11825 /* else fall through */
11826 }
11827 break;
11828
11829 case 0x2C:
11830 case 0x2D:
11831 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
11832 I32 in mmx, according to prevailing SSE rounding mode */
11833 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
11834 I32 in mmx, rounding towards zero */
11835 if (haveNo66noF2noF3(pfx) && sz == 4) {
11836 IRTemp dst64 = newTemp(Ity_I64);
11837 IRTemp rmode = newTemp(Ity_I32);
11838 IRTemp f32lo = newTemp(Ity_F32);
11839 IRTemp f32hi = newTemp(Ity_F32);
11840 Bool r2zero = toBool(opc == 0x2C);
11841
11842 do_MMX_preamble();
11843 modrm = getUChar(delta);
11844
11845 if (epartIsReg(modrm)) {
11846 delta += 1;
11847 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
11848 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
11849 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
11850 nameXMMReg(eregOfRexRM(pfx,modrm)),
11851 nameMMXReg(gregLO3ofRM(modrm)));
11852 } else {
11853 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11854 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
11855 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
11856 mkexpr(addr),
11857 mkU64(4) )));
11858 delta += alen;
11859 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
11860 dis_buf,
11861 nameMMXReg(gregLO3ofRM(modrm)));
11862 }
11863
11864 if (r2zero) {
11865 assign(rmode, mkU32((UInt)Irrm_ZERO) );
11866 } else {
11867 assign( rmode, get_sse_roundingmode() );
11868 }
11869
11870 assign(
11871 dst64,
11872 binop( Iop_32HLto64,
11873 binop( Iop_F64toI32S,
11874 mkexpr(rmode),
11875 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
11876 binop( Iop_F64toI32S,
11877 mkexpr(rmode),
11878 unop( Iop_F32toF64, mkexpr(f32lo) ) )
11879 )
11880 );
11881
11882 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
11883 goto decode_success;
11884 }
11885 /* F3 0F 2D = CVTSS2SI
11886 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
11887 according to prevailing SSE rounding mode
11888 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
11889 according to prevailing SSE rounding mode
11890 */
11891 /* F3 0F 2C = CVTTSS2SI
11892 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
11893 truncating towards zero
11894 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
11895 truncating towards zero
11896 */
11897 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
sewardj80804d12012-05-22 10:48:13 +000011898 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000011899 goto decode_success;
11900 }
11901 /* F2 0F 2D = CVTSD2SI
11902 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
11903 according to prevailing SSE rounding mode
11904 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
11905 according to prevailing SSE rounding mode
11906 */
11907 /* F2 0F 2C = CVTTSD2SI
11908 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
11909 truncating towards zero
11910 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
11911 truncating towards zero
11912 */
11913 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000011914 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000011915 goto decode_success;
11916 }
11917 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
11918 I32 in mmx, according to prevailing SSE rounding mode */
11919 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
11920 I32 in mmx, rounding towards zero */
11921 if (have66noF2noF3(pfx) && sz == 2) {
11922 IRTemp dst64 = newTemp(Ity_I64);
11923 IRTemp rmode = newTemp(Ity_I32);
11924 IRTemp f64lo = newTemp(Ity_F64);
11925 IRTemp f64hi = newTemp(Ity_F64);
11926 Bool r2zero = toBool(opc == 0x2C);
11927
11928 do_MMX_preamble();
11929 modrm = getUChar(delta);
11930
11931 if (epartIsReg(modrm)) {
11932 delta += 1;
11933 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
11934 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
11935 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
11936 nameXMMReg(eregOfRexRM(pfx,modrm)),
11937 nameMMXReg(gregLO3ofRM(modrm)));
11938 } else {
11939 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11940 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
11941 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
11942 mkexpr(addr),
11943 mkU64(8) )));
11944 delta += alen;
11945 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
11946 dis_buf,
11947 nameMMXReg(gregLO3ofRM(modrm)));
11948 }
11949
11950 if (r2zero) {
11951 assign(rmode, mkU32((UInt)Irrm_ZERO) );
11952 } else {
11953 assign( rmode, get_sse_roundingmode() );
11954 }
11955
11956 assign(
11957 dst64,
11958 binop( Iop_32HLto64,
11959 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
11960 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
11961 )
11962 );
11963
11964 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
11965 goto decode_success;
11966 }
11967 break;
11968
11969 case 0x2E:
11970 case 0x2F:
sewardj80611e32012-01-20 13:07:24 +000011971 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000011972 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000011973 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000011974 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000011975 goto decode_success;
11976 }
sewardj80611e32012-01-20 13:07:24 +000011977 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000011978 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000011979 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000011980 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000011981 goto decode_success;
11982 }
11983 break;
11984
11985 case 0x50:
11986 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
11987 to 4 lowest bits of ireg(G) */
sewardj8eb7ae82012-06-24 14:00:27 +000011988 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
11989 && epartIsReg(getUChar(delta))) {
sewardj80611e32012-01-20 13:07:24 +000011990 /* sz == 8 is a kludge to handle insns with REX.W redundantly
11991 set to 1, which has been known to happen:
11992
11993 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
11994
11995 20071106: Intel docs say that REX.W isn't redundant: when
11996 present, a 64-bit register is written; when not present, only
11997 the 32-bit half is written. However, testing on a Core2
11998 machine suggests the entire 64 bit register is written
11999 irrespective of the status of REX.W. That could be because
12000 of the default rule that says "if the lower half of a 32-bit
12001 register is written, the upper half is zeroed". By using
12002 putIReg32 here we inadvertantly produce the same behaviour as
12003 the Core2, for the same reason -- putIReg32 implements said
12004 rule.
12005
12006 AMD docs give no indication that REX.W is even valid for this
12007 insn. */
sewardj8eb7ae82012-06-24 14:00:27 +000012008 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
12009 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012010 }
12011 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
12012 2 lowest bits of ireg(G) */
12013 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
12014 /* sz == 8 is a kludge to handle insns with REX.W redundantly
12015 set to 1, which has been known to happen:
12016 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
12017 20071106: see further comments on MOVMSKPS implementation above.
12018 */
sewardj8eb7ae82012-06-24 14:00:27 +000012019 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
12020 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012021 }
12022 break;
12023
12024 case 0x51:
12025 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
12026 if (haveF3no66noF2(pfx) && sz == 4) {
12027 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
12028 "sqrtss", Iop_Sqrt32F0x4 );
12029 goto decode_success;
12030 }
12031 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
12032 if (haveNo66noF2noF3(pfx) && sz == 4) {
12033 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12034 "sqrtps", Iop_Sqrt32Fx4 );
12035 goto decode_success;
12036 }
12037 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
12038 if (haveF2no66noF3(pfx) && sz == 4) {
12039 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
12040 "sqrtsd", Iop_Sqrt64F0x2 );
12041 goto decode_success;
12042 }
12043 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
12044 if (have66noF2noF3(pfx) && sz == 2) {
12045 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12046 "sqrtpd", Iop_Sqrt64Fx2 );
12047 goto decode_success;
12048 }
12049 break;
12050
12051 case 0x52:
12052 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
12053 if (haveF3no66noF2(pfx) && sz == 4) {
12054 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
12055 "rsqrtss", Iop_RSqrt32F0x4 );
12056 goto decode_success;
12057 }
12058 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
12059 if (haveNo66noF2noF3(pfx) && sz == 4) {
12060 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12061 "rsqrtps", Iop_RSqrt32Fx4 );
12062 goto decode_success;
12063 }
12064 break;
12065
12066 case 0x53:
12067 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
12068 if (haveF3no66noF2(pfx) && sz == 4) {
12069 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
12070 "rcpss", Iop_Recip32F0x4 );
12071 goto decode_success;
12072 }
12073 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
12074 if (haveNo66noF2noF3(pfx) && sz == 4) {
12075 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12076 "rcpps", Iop_Recip32Fx4 );
12077 goto decode_success;
12078 }
12079 break;
12080
12081 case 0x54:
12082 /* 0F 54 = ANDPS -- G = G and E */
12083 if (haveNo66noF2noF3(pfx) && sz == 4) {
12084 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
12085 goto decode_success;
12086 }
12087 /* 66 0F 54 = ANDPD -- G = G and E */
12088 if (have66noF2noF3(pfx) && sz == 2) {
12089 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
12090 goto decode_success;
12091 }
12092 break;
12093
12094 case 0x55:
12095 /* 0F 55 = ANDNPS -- G = (not G) and E */
12096 if (haveNo66noF2noF3(pfx) && sz == 4) {
12097 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
12098 Iop_AndV128 );
12099 goto decode_success;
12100 }
12101 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
12102 if (have66noF2noF3(pfx) && sz == 2) {
12103 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
12104 Iop_AndV128 );
12105 goto decode_success;
12106 }
12107 break;
12108
12109 case 0x56:
12110 /* 0F 56 = ORPS -- G = G and E */
12111 if (haveNo66noF2noF3(pfx) && sz == 4) {
12112 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
12113 goto decode_success;
12114 }
12115 /* 66 0F 56 = ORPD -- G = G and E */
12116 if (have66noF2noF3(pfx) && sz == 2) {
12117 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
12118 goto decode_success;
12119 }
12120 break;
12121
12122 case 0x57:
12123 /* 66 0F 57 = XORPD -- G = G xor E */
12124 if (have66noF2noF3(pfx) && sz == 2) {
12125 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
12126 goto decode_success;
12127 }
sewardjc4530ae2012-05-21 10:18:49 +000012128 /* 0F 57 = XORPS -- G = G xor E */
sewardj80611e32012-01-20 13:07:24 +000012129 if (haveNo66noF2noF3(pfx) && sz == 4) {
12130 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
12131 goto decode_success;
12132 }
12133 break;
12134
12135 case 0x58:
12136 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
12137 if (haveNo66noF2noF3(pfx) && sz == 4) {
12138 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
12139 goto decode_success;
12140 }
12141 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
12142 if (haveF3no66noF2(pfx) && sz == 4) {
12143 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
12144 goto decode_success;
12145 }
12146 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
12147 if (haveF2no66noF3(pfx)
12148 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12149 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
12150 goto decode_success;
12151 }
12152 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
12153 if (have66noF2noF3(pfx)
12154 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12155 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
12156 goto decode_success;
12157 }
12158 break;
12159
12160 case 0x59:
12161 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
12162 if (haveF2no66noF3(pfx)
12163 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12164 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
12165 goto decode_success;
12166 }
12167 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
12168 if (haveF3no66noF2(pfx) && sz == 4) {
12169 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
12170 goto decode_success;
12171 }
12172 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
12173 if (haveNo66noF2noF3(pfx) && sz == 4) {
12174 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
12175 goto decode_success;
12176 }
12177 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
12178 if (have66noF2noF3(pfx)
12179 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12180 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
12181 goto decode_success;
12182 }
12183 break;
12184
12185 case 0x5A:
12186 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
12187 F64 in xmm(G). */
12188 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012189 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012190 goto decode_success;
12191 }
12192 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
12193 low half xmm(G) */
12194 if (haveF3no66noF2(pfx) && sz == 4) {
12195 IRTemp f32lo = newTemp(Ity_F32);
12196
12197 modrm = getUChar(delta);
12198 if (epartIsReg(modrm)) {
12199 delta += 1;
12200 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
12201 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12202 nameXMMReg(gregOfRexRM(pfx,modrm)));
12203 } else {
12204 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12205 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
12206 delta += alen;
12207 DIP("cvtss2sd %s,%s\n", dis_buf,
12208 nameXMMReg(gregOfRexRM(pfx,modrm)));
12209 }
12210
12211 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
12212 unop( Iop_F32toF64, mkexpr(f32lo) ) );
12213
12214 goto decode_success;
12215 }
12216 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
12217 low 1/4 xmm(G), according to prevailing SSE rounding mode */
12218 if (haveF2no66noF3(pfx) && sz == 4) {
12219 IRTemp rmode = newTemp(Ity_I32);
12220 IRTemp f64lo = newTemp(Ity_F64);
12221
12222 modrm = getUChar(delta);
12223 if (epartIsReg(modrm)) {
12224 delta += 1;
12225 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
12226 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12227 nameXMMReg(gregOfRexRM(pfx,modrm)));
12228 } else {
12229 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12230 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
12231 delta += alen;
12232 DIP("cvtsd2ss %s,%s\n", dis_buf,
12233 nameXMMReg(gregOfRexRM(pfx,modrm)));
12234 }
12235
12236 assign( rmode, get_sse_roundingmode() );
12237 putXMMRegLane32F(
12238 gregOfRexRM(pfx,modrm), 0,
12239 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
12240 );
12241
12242 goto decode_success;
12243 }
12244 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
12245 lo half xmm(G), rounding according to prevailing SSE rounding
12246 mode, and zero upper half */
12247 /* Note, this is practically identical to CVTPD2DQ. It would have
sewardjc4530ae2012-05-21 10:18:49 +000012248 be nice to merge them together. */
sewardj80611e32012-01-20 13:07:24 +000012249 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000012250 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012251 goto decode_success;
12252 }
12253 break;
12254
12255 case 0x5B:
12256 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12257 xmm(G), rounding towards zero */
12258 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12259 xmm(G), as per the prevailing rounding mode */
12260 if ( (have66noF2noF3(pfx) && sz == 2)
12261 || (haveF3no66noF2(pfx) && sz == 4) ) {
sewardj251b59e2012-05-25 13:51:07 +000012262 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
sewardj66becf32012-06-18 23:15:16 +000012263 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
sewardj80611e32012-01-20 13:07:24 +000012264 goto decode_success;
12265 }
12266 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
12267 xmm(G) */
12268 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012269 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012270 goto decode_success;
12271 }
12272 break;
12273
12274 case 0x5C:
12275 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
12276 if (haveF3no66noF2(pfx) && sz == 4) {
12277 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
12278 goto decode_success;
12279 }
12280 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
12281 if (haveF2no66noF3(pfx)
12282 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12283 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
12284 goto decode_success;
12285 }
12286 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
12287 if (haveNo66noF2noF3(pfx) && sz == 4) {
12288 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
12289 goto decode_success;
12290 }
12291 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
12292 if (have66noF2noF3(pfx) && sz == 2) {
12293 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
12294 goto decode_success;
12295 }
12296 break;
12297
12298 case 0x5D:
12299 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
12300 if (haveNo66noF2noF3(pfx) && sz == 4) {
12301 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
12302 goto decode_success;
12303 }
12304 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
12305 if (haveF3no66noF2(pfx) && sz == 4) {
12306 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
12307 goto decode_success;
12308 }
12309 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
12310 if (haveF2no66noF3(pfx) && sz == 4) {
12311 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
12312 goto decode_success;
12313 }
12314 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
12315 if (have66noF2noF3(pfx) && sz == 2) {
12316 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
12317 goto decode_success;
12318 }
12319 break;
12320
12321 case 0x5E:
12322 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
12323 if (haveF2no66noF3(pfx) && sz == 4) {
12324 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
12325 goto decode_success;
12326 }
12327 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
12328 if (haveNo66noF2noF3(pfx) && sz == 4) {
12329 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
12330 goto decode_success;
12331 }
12332 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
12333 if (haveF3no66noF2(pfx) && sz == 4) {
12334 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
12335 goto decode_success;
12336 }
12337 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
12338 if (have66noF2noF3(pfx) && sz == 2) {
12339 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
12340 goto decode_success;
12341 }
12342 break;
12343
12344 case 0x5F:
12345 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
12346 if (haveNo66noF2noF3(pfx) && sz == 4) {
12347 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
12348 goto decode_success;
12349 }
12350 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
12351 if (haveF3no66noF2(pfx) && sz == 4) {
12352 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
12353 goto decode_success;
12354 }
12355 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
12356 if (haveF2no66noF3(pfx) && sz == 4) {
12357 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
12358 goto decode_success;
12359 }
12360 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
12361 if (have66noF2noF3(pfx) && sz == 2) {
12362 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
12363 goto decode_success;
12364 }
12365 break;
12366
12367 case 0x60:
12368 /* 66 0F 60 = PUNPCKLBW */
12369 if (have66noF2noF3(pfx) && sz == 2) {
12370 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12371 "punpcklbw",
12372 Iop_InterleaveLO8x16, True );
12373 goto decode_success;
12374 }
12375 break;
12376
12377 case 0x61:
12378 /* 66 0F 61 = PUNPCKLWD */
12379 if (have66noF2noF3(pfx) && sz == 2) {
12380 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12381 "punpcklwd",
12382 Iop_InterleaveLO16x8, True );
12383 goto decode_success;
12384 }
12385 break;
12386
12387 case 0x62:
12388 /* 66 0F 62 = PUNPCKLDQ */
12389 if (have66noF2noF3(pfx) && sz == 2) {
12390 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12391 "punpckldq",
12392 Iop_InterleaveLO32x4, True );
12393 goto decode_success;
12394 }
12395 break;
12396
12397 case 0x63:
12398 /* 66 0F 63 = PACKSSWB */
12399 if (have66noF2noF3(pfx) && sz == 2) {
12400 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12401 "packsswb",
12402 Iop_QNarrowBin16Sto8Sx16, True );
12403 goto decode_success;
12404 }
12405 break;
12406
12407 case 0x64:
12408 /* 66 0F 64 = PCMPGTB */
12409 if (have66noF2noF3(pfx) && sz == 2) {
12410 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12411 "pcmpgtb", Iop_CmpGT8Sx16, False );
12412 goto decode_success;
12413 }
12414 break;
12415
12416 case 0x65:
12417 /* 66 0F 65 = PCMPGTW */
12418 if (have66noF2noF3(pfx) && sz == 2) {
12419 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12420 "pcmpgtw", Iop_CmpGT16Sx8, False );
12421 goto decode_success;
12422 }
12423 break;
12424
12425 case 0x66:
12426 /* 66 0F 66 = PCMPGTD */
12427 if (have66noF2noF3(pfx) && sz == 2) {
12428 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12429 "pcmpgtd", Iop_CmpGT32Sx4, False );
12430 goto decode_success;
12431 }
12432 break;
12433
12434 case 0x67:
12435 /* 66 0F 67 = PACKUSWB */
12436 if (have66noF2noF3(pfx) && sz == 2) {
12437 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12438 "packuswb",
12439 Iop_QNarrowBin16Sto8Ux16, True );
12440 goto decode_success;
12441 }
12442 break;
12443
12444 case 0x68:
12445 /* 66 0F 68 = PUNPCKHBW */
12446 if (have66noF2noF3(pfx) && sz == 2) {
12447 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12448 "punpckhbw",
12449 Iop_InterleaveHI8x16, True );
12450 goto decode_success;
12451 }
12452 break;
12453
12454 case 0x69:
12455 /* 66 0F 69 = PUNPCKHWD */
12456 if (have66noF2noF3(pfx) && sz == 2) {
12457 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12458 "punpckhwd",
12459 Iop_InterleaveHI16x8, True );
12460 goto decode_success;
12461 }
12462 break;
12463
12464 case 0x6A:
12465 /* 66 0F 6A = PUNPCKHDQ */
12466 if (have66noF2noF3(pfx) && sz == 2) {
12467 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12468 "punpckhdq",
12469 Iop_InterleaveHI32x4, True );
12470 goto decode_success;
12471 }
12472 break;
12473
12474 case 0x6B:
12475 /* 66 0F 6B = PACKSSDW */
12476 if (have66noF2noF3(pfx) && sz == 2) {
12477 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12478 "packssdw",
12479 Iop_QNarrowBin32Sto16Sx8, True );
12480 goto decode_success;
12481 }
12482 break;
12483
12484 case 0x6C:
12485 /* 66 0F 6C = PUNPCKLQDQ */
12486 if (have66noF2noF3(pfx) && sz == 2) {
12487 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12488 "punpcklqdq",
12489 Iop_InterleaveLO64x2, True );
12490 goto decode_success;
12491 }
12492 break;
12493
12494 case 0x6D:
12495 /* 66 0F 6D = PUNPCKHQDQ */
12496 if (have66noF2noF3(pfx) && sz == 2) {
12497 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12498 "punpckhqdq",
12499 Iop_InterleaveHI64x2, True );
12500 goto decode_success;
12501 }
12502 break;
12503
12504 case 0x6E:
12505 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
12506 zeroing high 3/4 of xmm. */
12507 /* or from ireg64/m64 to xmm lo 1/2,
12508 zeroing high 1/2 of xmm. */
12509 if (have66noF2noF3(pfx)) {
12510 vassert(sz == 2 || sz == 8);
12511 if (sz == 2) sz = 4;
12512 modrm = getUChar(delta);
12513 if (epartIsReg(modrm)) {
12514 delta += 1;
12515 if (sz == 4) {
12516 putXMMReg(
12517 gregOfRexRM(pfx,modrm),
12518 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
12519 );
12520 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12521 nameXMMReg(gregOfRexRM(pfx,modrm)));
12522 } else {
12523 putXMMReg(
12524 gregOfRexRM(pfx,modrm),
12525 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
12526 );
12527 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
12528 nameXMMReg(gregOfRexRM(pfx,modrm)));
12529 }
12530 } else {
12531 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
12532 delta += alen;
12533 putXMMReg(
12534 gregOfRexRM(pfx,modrm),
12535 sz == 4
12536 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
12537 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
12538 );
12539 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
12540 nameXMMReg(gregOfRexRM(pfx,modrm)));
12541 }
12542 goto decode_success;
12543 }
12544 break;
12545
12546 case 0x6F:
12547 if (have66noF2noF3(pfx)
12548 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12549 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
12550 modrm = getUChar(delta);
12551 if (epartIsReg(modrm)) {
12552 putXMMReg( gregOfRexRM(pfx,modrm),
12553 getXMMReg( eregOfRexRM(pfx,modrm) ));
12554 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12555 nameXMMReg(gregOfRexRM(pfx,modrm)));
12556 delta += 1;
12557 } else {
12558 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12559 gen_SEGV_if_not_16_aligned( addr );
12560 putXMMReg( gregOfRexRM(pfx,modrm),
12561 loadLE(Ity_V128, mkexpr(addr)) );
12562 DIP("movdqa %s,%s\n", dis_buf,
12563 nameXMMReg(gregOfRexRM(pfx,modrm)));
12564 delta += alen;
12565 }
12566 goto decode_success;
12567 }
12568 if (haveF3no66noF2(pfx) && sz == 4) {
12569 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
12570 modrm = getUChar(delta);
12571 if (epartIsReg(modrm)) {
12572 putXMMReg( gregOfRexRM(pfx,modrm),
12573 getXMMReg( eregOfRexRM(pfx,modrm) ));
12574 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12575 nameXMMReg(gregOfRexRM(pfx,modrm)));
12576 delta += 1;
12577 } else {
12578 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12579 putXMMReg( gregOfRexRM(pfx,modrm),
12580 loadLE(Ity_V128, mkexpr(addr)) );
12581 DIP("movdqu %s,%s\n", dis_buf,
12582 nameXMMReg(gregOfRexRM(pfx,modrm)));
12583 delta += alen;
12584 }
12585 goto decode_success;
12586 }
12587 break;
12588
12589 case 0x70:
12590 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
12591 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000012592 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
sewardj80611e32012-01-20 13:07:24 +000012593 goto decode_success;
12594 }
12595 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
12596 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
12597 if (haveNo66noF2noF3(pfx) && sz == 4) {
12598 Int order;
12599 IRTemp sV, dV, s3, s2, s1, s0;
12600 s3 = s2 = s1 = s0 = IRTemp_INVALID;
12601 sV = newTemp(Ity_I64);
12602 dV = newTemp(Ity_I64);
12603 do_MMX_preamble();
12604 modrm = getUChar(delta);
12605 if (epartIsReg(modrm)) {
12606 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
12607 order = (Int)getUChar(delta+1);
12608 delta += 1+1;
12609 DIP("pshufw $%d,%s,%s\n", order,
12610 nameMMXReg(eregLO3ofRM(modrm)),
12611 nameMMXReg(gregLO3ofRM(modrm)));
12612 } else {
12613 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
12614 1/*extra byte after amode*/ );
12615 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
12616 order = (Int)getUChar(delta+alen);
12617 delta += 1+alen;
12618 DIP("pshufw $%d,%s,%s\n", order,
12619 dis_buf,
12620 nameMMXReg(gregLO3ofRM(modrm)));
12621 }
12622 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
12623# define SEL(n) \
12624 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
12625 assign(dV,
12626 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
12627 SEL((order>>2)&3), SEL((order>>0)&3) )
12628 );
12629 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
12630# undef SEL
12631 goto decode_success;
12632 }
12633 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
12634 mem) to G(xmm), and copy upper half */
12635 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000012636 delta = dis_PSHUFxW_128( vbi, pfx, delta,
12637 False/*!isAvx*/, False/*!xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000012638 goto decode_success;
12639 }
12640 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
12641 mem) to G(xmm), and copy lower half */
12642 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000012643 delta = dis_PSHUFxW_128( vbi, pfx, delta,
12644 False/*!isAvx*/, True/*xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000012645 goto decode_success;
12646 }
12647 break;
12648
12649 case 0x71:
12650 /* 66 0F 71 /2 ib = PSRLW by immediate */
12651 if (have66noF2noF3(pfx) && sz == 2
12652 && epartIsReg(getUChar(delta))
12653 && gregLO3ofRM(getUChar(delta)) == 2) {
12654 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
12655 goto decode_success;
12656 }
12657 /* 66 0F 71 /4 ib = PSRAW by immediate */
12658 if (have66noF2noF3(pfx) && sz == 2
12659 && epartIsReg(getUChar(delta))
12660 && gregLO3ofRM(getUChar(delta)) == 4) {
12661 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
12662 goto decode_success;
12663 }
12664 /* 66 0F 71 /6 ib = PSLLW by immediate */
12665 if (have66noF2noF3(pfx) && sz == 2
12666 && epartIsReg(getUChar(delta))
12667 && gregLO3ofRM(getUChar(delta)) == 6) {
12668 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
12669 goto decode_success;
12670 }
12671 break;
12672
12673 case 0x72:
12674 /* 66 0F 72 /2 ib = PSRLD by immediate */
12675 if (have66noF2noF3(pfx) && sz == 2
12676 && epartIsReg(getUChar(delta))
12677 && gregLO3ofRM(getUChar(delta)) == 2) {
12678 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
12679 goto decode_success;
12680 }
12681 /* 66 0F 72 /4 ib = PSRAD by immediate */
12682 if (have66noF2noF3(pfx) && sz == 2
12683 && epartIsReg(getUChar(delta))
12684 && gregLO3ofRM(getUChar(delta)) == 4) {
12685 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
12686 goto decode_success;
12687 }
12688 /* 66 0F 72 /6 ib = PSLLD by immediate */
12689 if (have66noF2noF3(pfx) && sz == 2
12690 && epartIsReg(getUChar(delta))
12691 && gregLO3ofRM(getUChar(delta)) == 6) {
12692 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
12693 goto decode_success;
12694 }
12695 break;
12696
12697 case 0x73:
12698 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
12699 /* note, if mem case ever filled in, 1 byte after amode */
12700 if (have66noF2noF3(pfx) && sz == 2
12701 && epartIsReg(getUChar(delta))
12702 && gregLO3ofRM(getUChar(delta)) == 3) {
sewardjc4530ae2012-05-21 10:18:49 +000012703 Int imm = (Int)getUChar(delta+1);
12704 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000012705 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
sewardj80611e32012-01-20 13:07:24 +000012706 delta += 2;
sewardjc4530ae2012-05-21 10:18:49 +000012707 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012708 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000012709 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000012710 goto decode_success;
12711 }
12712 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
12713 /* note, if mem case ever filled in, 1 byte after amode */
12714 if (have66noF2noF3(pfx) && sz == 2
12715 && epartIsReg(getUChar(delta))
12716 && gregLO3ofRM(getUChar(delta)) == 7) {
sewardj251b59e2012-05-25 13:51:07 +000012717 Int imm = (Int)getUChar(delta+1);
12718 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000012719 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
12720 vassert(imm >= 0 && imm <= 255);
12721 delta += 2;
sewardj251b59e2012-05-25 13:51:07 +000012722 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012723 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000012724 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000012725 goto decode_success;
12726 }
12727 /* 66 0F 73 /2 ib = PSRLQ by immediate */
12728 if (have66noF2noF3(pfx) && sz == 2
12729 && epartIsReg(getUChar(delta))
12730 && gregLO3ofRM(getUChar(delta)) == 2) {
12731 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
12732 goto decode_success;
12733 }
12734 /* 66 0F 73 /6 ib = PSLLQ by immediate */
12735 if (have66noF2noF3(pfx) && sz == 2
12736 && epartIsReg(getUChar(delta))
12737 && gregLO3ofRM(getUChar(delta)) == 6) {
12738 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
12739 goto decode_success;
12740 }
12741 break;
12742
12743 case 0x74:
12744 /* 66 0F 74 = PCMPEQB */
12745 if (have66noF2noF3(pfx) && sz == 2) {
12746 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12747 "pcmpeqb", Iop_CmpEQ8x16, False );
12748 goto decode_success;
12749 }
12750 break;
12751
12752 case 0x75:
12753 /* 66 0F 75 = PCMPEQW */
12754 if (have66noF2noF3(pfx) && sz == 2) {
12755 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12756 "pcmpeqw", Iop_CmpEQ16x8, False );
12757 goto decode_success;
12758 }
12759 break;
12760
12761 case 0x76:
12762 /* 66 0F 76 = PCMPEQD */
12763 if (have66noF2noF3(pfx) && sz == 2) {
12764 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12765 "pcmpeqd", Iop_CmpEQ32x4, False );
12766 goto decode_success;
12767 }
12768 break;
12769
12770 case 0x7E:
12771 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
12772 G (lo half xmm). Upper half of G is zeroed out. */
12773 if (haveF3no66noF2(pfx)
12774 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12775 modrm = getUChar(delta);
12776 if (epartIsReg(modrm)) {
12777 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12778 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12779 /* zero bits 127:64 */
12780 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
12781 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12782 nameXMMReg(gregOfRexRM(pfx,modrm)));
12783 delta += 1;
12784 } else {
12785 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12786 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12787 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12788 loadLE(Ity_I64, mkexpr(addr)) );
12789 DIP("movsd %s,%s\n", dis_buf,
12790 nameXMMReg(gregOfRexRM(pfx,modrm)));
12791 delta += alen;
12792 }
12793 goto decode_success;
12794 }
12795 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
12796 /* or from xmm low 1/2 to ireg64 or m64. */
12797 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
12798 if (sz == 2) sz = 4;
12799 modrm = getUChar(delta);
12800 if (epartIsReg(modrm)) {
12801 delta += 1;
12802 if (sz == 4) {
12803 putIReg32( eregOfRexRM(pfx,modrm),
12804 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12805 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12806 nameIReg32(eregOfRexRM(pfx,modrm)));
12807 } else {
12808 putIReg64( eregOfRexRM(pfx,modrm),
12809 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12810 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12811 nameIReg64(eregOfRexRM(pfx,modrm)));
12812 }
12813 } else {
12814 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
12815 delta += alen;
12816 storeLE( mkexpr(addr),
12817 sz == 4
12818 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
12819 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
12820 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
12821 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
12822 }
12823 goto decode_success;
12824 }
12825 break;
12826
12827 case 0x7F:
12828 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
12829 if (haveF3no66noF2(pfx) && sz == 4) {
12830 modrm = getUChar(delta);
12831 if (epartIsReg(modrm)) {
12832 goto decode_failure; /* awaiting test case */
12833 delta += 1;
12834 putXMMReg( eregOfRexRM(pfx,modrm),
12835 getXMMReg(gregOfRexRM(pfx,modrm)) );
12836 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12837 nameXMMReg(eregOfRexRM(pfx,modrm)));
12838 } else {
12839 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
12840 delta += alen;
12841 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12842 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
12843 }
12844 goto decode_success;
12845 }
12846 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
12847 if (have66noF2noF3(pfx) && sz == 2) {
12848 modrm = getUChar(delta);
12849 if (epartIsReg(modrm)) {
12850 delta += 1;
12851 putXMMReg( eregOfRexRM(pfx,modrm),
12852 getXMMReg(gregOfRexRM(pfx,modrm)) );
12853 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12854 nameXMMReg(eregOfRexRM(pfx,modrm)));
12855 } else {
12856 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
12857 gen_SEGV_if_not_16_aligned( addr );
12858 delta += alen;
12859 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12860 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
12861 }
12862 goto decode_success;
12863 }
12864 break;
12865
12866 case 0xAE:
12867 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
12868 if (haveNo66noF2noF3(pfx)
12869 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
12870 && sz == 4) {
12871 delta += 1;
12872 /* Insert a memory fence. It's sometimes important that these
12873 are carried through to the generated code. */
12874 stmt( IRStmt_MBE(Imbe_Fence) );
12875 DIP("sfence\n");
12876 goto decode_success;
12877 }
12878 /* mindless duplication follows .. */
12879 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
12880 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
12881 if (haveNo66noF2noF3(pfx)
12882 && epartIsReg(getUChar(delta))
12883 && (gregLO3ofRM(getUChar(delta)) == 5
12884 || gregLO3ofRM(getUChar(delta)) == 6)
12885 && sz == 4) {
12886 delta += 1;
12887 /* Insert a memory fence. It's sometimes important that these
12888 are carried through to the generated code. */
12889 stmt( IRStmt_MBE(Imbe_Fence) );
12890 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
12891 goto decode_success;
12892 }
sewardj30fc0582012-02-16 13:45:13 +000012893
12894 /* 0F AE /7 = CLFLUSH -- flush cache line */
12895 if (haveNo66noF2noF3(pfx)
12896 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
12897 && sz == 4) {
12898
12899 /* This is something of a hack. We need to know the size of
12900 the cache line containing addr. Since we don't (easily),
12901 assume 256 on the basis that no real cache would have a
12902 line that big. It's safe to invalidate more stuff than we
12903 need, just inefficient. */
12904 ULong lineszB = 256ULL;
12905
12906 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12907 delta += alen;
12908
12909 /* Round addr down to the start of the containing block. */
12910 stmt( IRStmt_Put(
12911 OFFB_TISTART,
12912 binop( Iop_And64,
12913 mkexpr(addr),
12914 mkU64( ~(lineszB-1) ))) );
12915
12916 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) );
sewardj96c5f262012-04-13 23:03:45 +000012917
12918 jmp_lit(dres, Ijk_TInval, (Addr64)(guest_RIP_bbstart+delta));
sewardj30fc0582012-02-16 13:45:13 +000012919
12920 DIP("clflush %s\n", dis_buf);
12921 goto decode_success;
12922 }
12923
sewardj80611e32012-01-20 13:07:24 +000012924 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
12925 if (haveNo66noF2noF3(pfx)
12926 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
12927 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000012928 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000012929 goto decode_success;
12930 }
12931 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
12932 if (haveNo66noF2noF3(pfx)
12933 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
12934 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000012935 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000012936 goto decode_success;
12937 }
12938 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
12939 Note that the presence or absence of REX.W slightly affects the
12940 written format: whether the saved FPU IP and DP pointers are 64
12941 or 32 bits. But the helper function we call simply writes zero
12942 bits in the relevant fields (which are 64 bits regardless of
12943 what REX.W is) and so it's good enough (iow, equally broken) in
12944 both cases. */
12945 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
12946 && !epartIsReg(getUChar(delta))
12947 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
12948 IRDirty* d;
12949 modrm = getUChar(delta);
12950 vassert(!epartIsReg(modrm));
12951
12952 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12953 delta += alen;
12954 gen_SEGV_if_not_16_aligned(addr);
12955
12956 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
12957
12958 /* Uses dirty helper:
12959 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */
12960 d = unsafeIRDirty_0_N (
12961 0/*regparms*/,
12962 "amd64g_dirtyhelper_FXSAVE",
12963 &amd64g_dirtyhelper_FXSAVE,
12964 mkIRExprVec_1( mkexpr(addr) )
12965 );
12966 d->needsBBP = True;
12967
12968 /* declare we're writing memory */
12969 d->mFx = Ifx_Write;
12970 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000012971 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000012972
12973 /* declare we're reading guest state */
12974 d->nFxState = 7;
sewardjc9069f22012-06-01 16:09:50 +000012975 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000012976
12977 d->fxState[0].fx = Ifx_Read;
12978 d->fxState[0].offset = OFFB_FTOP;
12979 d->fxState[0].size = sizeof(UInt);
12980
12981 d->fxState[1].fx = Ifx_Read;
12982 d->fxState[1].offset = OFFB_FPREGS;
12983 d->fxState[1].size = 8 * sizeof(ULong);
12984
12985 d->fxState[2].fx = Ifx_Read;
12986 d->fxState[2].offset = OFFB_FPTAGS;
12987 d->fxState[2].size = 8 * sizeof(UChar);
12988
12989 d->fxState[3].fx = Ifx_Read;
12990 d->fxState[3].offset = OFFB_FPROUND;
12991 d->fxState[3].size = sizeof(ULong);
12992
12993 d->fxState[4].fx = Ifx_Read;
12994 d->fxState[4].offset = OFFB_FC3210;
12995 d->fxState[4].size = sizeof(ULong);
12996
12997 d->fxState[5].fx = Ifx_Read;
sewardjc4530ae2012-05-21 10:18:49 +000012998 d->fxState[5].offset = OFFB_YMM0;
sewardjc9069f22012-06-01 16:09:50 +000012999 d->fxState[5].size = sizeof(U128);
13000 /* plus 15 more of the above, spaced out in YMM sized steps */
13001 d->fxState[5].nRepeats = 15;
13002 d->fxState[5].repeatLen = sizeof(U256);
sewardj80611e32012-01-20 13:07:24 +000013003
13004 d->fxState[6].fx = Ifx_Read;
13005 d->fxState[6].offset = OFFB_SSEROUND;
13006 d->fxState[6].size = sizeof(ULong);
13007
sewardjc4530ae2012-05-21 10:18:49 +000013008 /* Be paranoid ... this assertion tries to ensure the 16 %ymm
sewardjc9069f22012-06-01 16:09:50 +000013009 images are packed back-to-back. If not, the settings for
13010 d->fxState[5] are wrong. */
sewardjc4530ae2012-05-21 10:18:49 +000013011 vassert(32 == sizeof(U256));
13012 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
sewardj80611e32012-01-20 13:07:24 +000013013
13014 stmt( IRStmt_Dirty(d) );
13015
13016 goto decode_success;
13017 }
13018 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
13019 As with FXSAVE above we ignore the value of REX.W since we're
13020 not bothering with the FPU DP and IP fields. */
13021 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13022 && !epartIsReg(getUChar(delta))
13023 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
13024 IRDirty* d;
13025 modrm = getUChar(delta);
13026 vassert(!epartIsReg(modrm));
13027
13028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13029 delta += alen;
13030 gen_SEGV_if_not_16_aligned(addr);
13031
13032 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
13033
13034 /* Uses dirty helper:
13035 VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong )
13036 NOTE:
13037 the VexEmWarn value is simply ignored
13038 */
13039 d = unsafeIRDirty_0_N (
13040 0/*regparms*/,
13041 "amd64g_dirtyhelper_FXRSTOR",
13042 &amd64g_dirtyhelper_FXRSTOR,
13043 mkIRExprVec_1( mkexpr(addr) )
13044 );
13045 d->needsBBP = True;
13046
13047 /* declare we're reading memory */
13048 d->mFx = Ifx_Read;
13049 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000013050 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000013051
13052 /* declare we're writing guest state */
13053 d->nFxState = 7;
sewardjc9069f22012-06-01 16:09:50 +000013054 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000013055
13056 d->fxState[0].fx = Ifx_Write;
13057 d->fxState[0].offset = OFFB_FTOP;
13058 d->fxState[0].size = sizeof(UInt);
13059
13060 d->fxState[1].fx = Ifx_Write;
13061 d->fxState[1].offset = OFFB_FPREGS;
13062 d->fxState[1].size = 8 * sizeof(ULong);
13063
13064 d->fxState[2].fx = Ifx_Write;
13065 d->fxState[2].offset = OFFB_FPTAGS;
13066 d->fxState[2].size = 8 * sizeof(UChar);
13067
13068 d->fxState[3].fx = Ifx_Write;
13069 d->fxState[3].offset = OFFB_FPROUND;
13070 d->fxState[3].size = sizeof(ULong);
13071
13072 d->fxState[4].fx = Ifx_Write;
13073 d->fxState[4].offset = OFFB_FC3210;
13074 d->fxState[4].size = sizeof(ULong);
13075
13076 d->fxState[5].fx = Ifx_Write;
sewardjc4530ae2012-05-21 10:18:49 +000013077 d->fxState[5].offset = OFFB_YMM0;
sewardjc9069f22012-06-01 16:09:50 +000013078 d->fxState[5].size = sizeof(U128);
13079 /* plus 15 more of the above, spaced out in YMM sized steps */
13080 d->fxState[5].nRepeats = 15;
13081 d->fxState[5].repeatLen = sizeof(U256);
sewardj80611e32012-01-20 13:07:24 +000013082
13083 d->fxState[6].fx = Ifx_Write;
13084 d->fxState[6].offset = OFFB_SSEROUND;
13085 d->fxState[6].size = sizeof(ULong);
13086
sewardjc4530ae2012-05-21 10:18:49 +000013087 /* Be paranoid ... this assertion tries to ensure the 16 %ymm
sewardjc9069f22012-06-01 16:09:50 +000013088 images are packed back-to-back. If not, the settings for
13089 d->fxState[5] are wrong. */
sewardjc4530ae2012-05-21 10:18:49 +000013090 vassert(32 == sizeof(U256));
13091 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
sewardj80611e32012-01-20 13:07:24 +000013092
13093 stmt( IRStmt_Dirty(d) );
13094
13095 goto decode_success;
13096 }
13097 break;
13098
13099 case 0xC2:
13100 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
13101 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013102 Long delta0 = delta;
13103 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
13104 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013105 }
13106 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
13107 if (haveF3no66noF2(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013108 Long delta0 = delta;
13109 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
13110 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013111 }
13112 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
13113 if (haveF2no66noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013114 Long delta0 = delta;
13115 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
13116 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013117 }
13118 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
13119 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000013120 Long delta0 = delta;
13121 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
13122 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013123 }
13124 break;
13125
13126 case 0xC3:
13127 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
13128 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13129 modrm = getUChar(delta);
13130 if (!epartIsReg(modrm)) {
13131 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13132 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
13133 DIP("movnti %s,%s\n", dis_buf,
13134 nameIRegG(sz, pfx, modrm));
13135 delta += alen;
13136 goto decode_success;
13137 }
13138 /* else fall through */
13139 }
13140 break;
13141
13142 case 0xC4:
13143 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13144 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13145 put it into the specified lane of mmx(G). */
13146 if (haveNo66noF2noF3(pfx)
13147 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13148 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
13149 mmx reg. t4 is the new lane value. t5 is the original
13150 mmx value. t6 is the new mmx value. */
13151 Int lane;
13152 t4 = newTemp(Ity_I16);
13153 t5 = newTemp(Ity_I64);
13154 t6 = newTemp(Ity_I64);
13155 modrm = getUChar(delta);
13156 do_MMX_preamble();
13157
13158 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
13159 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
13160
13161 if (epartIsReg(modrm)) {
13162 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
13163 delta += 1+1;
13164 lane = getUChar(delta-1);
13165 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13166 nameIReg16(eregOfRexRM(pfx,modrm)),
13167 nameMMXReg(gregLO3ofRM(modrm)));
13168 } else {
13169 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13170 delta += 1+alen;
13171 lane = getUChar(delta-1);
13172 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
13173 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13174 dis_buf,
13175 nameMMXReg(gregLO3ofRM(modrm)));
13176 }
13177
13178 switch (lane & 3) {
13179 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
13180 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
13181 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
13182 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
13183 default: vassert(0);
13184 }
13185 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
13186 goto decode_success;
13187 }
13188 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13189 put it into the specified lane of xmm(G). */
13190 if (have66noF2noF3(pfx)
13191 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13192 Int lane;
13193 t4 = newTemp(Ity_I16);
13194 modrm = getUChar(delta);
sewardj4ed05e02012-06-18 15:01:30 +000013195 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000013196 if (epartIsReg(modrm)) {
sewardj4ed05e02012-06-18 15:01:30 +000013197 UInt rE = eregOfRexRM(pfx,modrm);
13198 assign(t4, getIReg16(rE));
sewardj80611e32012-01-20 13:07:24 +000013199 delta += 1+1;
13200 lane = getUChar(delta-1);
sewardj4ed05e02012-06-18 15:01:30 +000013201 DIP("pinsrw $%d,%s,%s\n",
13202 (Int)lane, nameIReg16(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013203 } else {
13204 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13205 1/*byte after the amode*/ );
13206 delta += 1+alen;
13207 lane = getUChar(delta-1);
13208 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
sewardj4ed05e02012-06-18 15:01:30 +000013209 DIP("pinsrw $%d,%s,%s\n",
13210 (Int)lane, dis_buf, nameXMMReg(rG));
13211 }
13212 IRTemp src_vec = newTemp(Ity_V128);
13213 assign(src_vec, getXMMReg(rG));
13214 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
13215 putXMMReg(rG, mkexpr(res_vec));
sewardj80611e32012-01-20 13:07:24 +000013216 goto decode_success;
13217 }
13218 break;
13219
13220 case 0xC5:
13221 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13222 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
13223 zero-extend of it in ireg(G). */
13224 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13225 modrm = getUChar(delta);
13226 if (epartIsReg(modrm)) {
13227 IRTemp sV = newTemp(Ity_I64);
13228 t5 = newTemp(Ity_I16);
13229 do_MMX_preamble();
13230 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
13231 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
13232 switch (getUChar(delta+1) & 3) {
13233 case 0: assign(t5, mkexpr(t0)); break;
13234 case 1: assign(t5, mkexpr(t1)); break;
13235 case 2: assign(t5, mkexpr(t2)); break;
13236 case 3: assign(t5, mkexpr(t3)); break;
13237 default: vassert(0);
13238 }
13239 if (sz == 8)
13240 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
13241 else
13242 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
13243 DIP("pextrw $%d,%s,%s\n",
13244 (Int)getUChar(delta+1),
13245 nameMMXReg(eregLO3ofRM(modrm)),
13246 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
13247 : nameIReg32(gregOfRexRM(pfx,modrm))
13248 );
13249 delta += 2;
13250 goto decode_success;
13251 }
13252 /* else fall through */
13253 /* note, for anyone filling in the mem case: this insn has one
13254 byte after the amode and therefore you must pass 1 as the
13255 last arg to disAMode */
13256 }
13257 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
13258 zero-extend of it in ireg(G). */
13259 if (have66noF2noF3(pfx)
13260 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardje8a7eb72012-06-12 14:59:17 +000013261 Long delta0 = delta;
13262 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
13263 False/*!isAvx*/ );
13264 if (delta > delta0) goto decode_success;
13265 /* else fall through -- decoding has failed */
sewardj80611e32012-01-20 13:07:24 +000013266 }
13267 break;
13268
13269 case 0xC6:
13270 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
13271 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013272 Int imm8 = 0;
13273 IRTemp sV = newTemp(Ity_V128);
13274 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013275 modrm = getUChar(delta);
sewardj251b59e2012-05-25 13:51:07 +000013276 UInt rG = gregOfRexRM(pfx,modrm);
13277 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000013278 if (epartIsReg(modrm)) {
sewardj251b59e2012-05-25 13:51:07 +000013279 UInt rE = eregOfRexRM(pfx,modrm);
13280 assign( sV, getXMMReg(rE) );
13281 imm8 = (Int)getUChar(delta+1);
sewardj80611e32012-01-20 13:07:24 +000013282 delta += 1+1;
sewardj251b59e2012-05-25 13:51:07 +000013283 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013284 } else {
sewardj251b59e2012-05-25 13:51:07 +000013285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000013286 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj251b59e2012-05-25 13:51:07 +000013287 imm8 = (Int)getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000013288 delta += 1+alen;
sewardj251b59e2012-05-25 13:51:07 +000013289 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013290 }
sewardj4b1cc832012-06-13 11:10:20 +000013291 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
sewardj251b59e2012-05-25 13:51:07 +000013292 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013293 goto decode_success;
13294 }
13295 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
13296 if (have66noF2noF3(pfx) && sz == 2) {
13297 Int select;
13298 IRTemp sV = newTemp(Ity_V128);
13299 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013300
13301 modrm = getUChar(delta);
13302 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13303
13304 if (epartIsReg(modrm)) {
13305 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13306 select = (Int)getUChar(delta+1);
13307 delta += 1+1;
13308 DIP("shufpd $%d,%s,%s\n", select,
13309 nameXMMReg(eregOfRexRM(pfx,modrm)),
13310 nameXMMReg(gregOfRexRM(pfx,modrm)));
13311 } else {
13312 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13313 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13314 select = getUChar(delta+alen);
13315 delta += 1+alen;
13316 DIP("shufpd $%d,%s,%s\n", select,
13317 dis_buf,
13318 nameXMMReg(gregOfRexRM(pfx,modrm)));
13319 }
13320
sewardj21459cb2012-06-18 14:05:52 +000013321 IRTemp res = math_SHUFPD_128( sV, dV, select );
13322 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013323 goto decode_success;
13324 }
13325 break;
13326
13327 case 0xD1:
13328 /* 66 0F D1 = PSRLW by E */
13329 if (have66noF2noF3(pfx) && sz == 2) {
13330 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
13331 goto decode_success;
13332 }
13333 break;
13334
13335 case 0xD2:
13336 /* 66 0F D2 = PSRLD by E */
13337 if (have66noF2noF3(pfx) && sz == 2) {
13338 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
13339 goto decode_success;
13340 }
13341 break;
13342
13343 case 0xD3:
13344 /* 66 0F D3 = PSRLQ by E */
13345 if (have66noF2noF3(pfx) && sz == 2) {
13346 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
13347 goto decode_success;
13348 }
13349 break;
13350
13351 case 0xD4:
13352 /* 66 0F D4 = PADDQ */
13353 if (have66noF2noF3(pfx) && sz == 2) {
13354 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13355 "paddq", Iop_Add64x2, False );
13356 goto decode_success;
13357 }
13358 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
13359 /* 0F D4 = PADDQ -- add 64x1 */
13360 if (haveNo66noF2noF3(pfx) && sz == 4) {
13361 do_MMX_preamble();
13362 delta = dis_MMXop_regmem_to_reg (
13363 vbi, pfx, delta, opc, "paddq", False );
13364 goto decode_success;
13365 }
13366 break;
13367
13368 case 0xD5:
sewardj251b59e2012-05-25 13:51:07 +000013369 /* 66 0F D5 = PMULLW -- 16x8 multiply */
sewardj80611e32012-01-20 13:07:24 +000013370 if (have66noF2noF3(pfx) && sz == 2) {
13371 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13372 "pmullw", Iop_Mul16x8, False );
13373 goto decode_success;
13374 }
13375 break;
13376
13377 case 0xD6:
13378 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
13379 hi half). */
13380 if (haveF3no66noF2(pfx) && sz == 4) {
13381 modrm = getUChar(delta);
13382 if (epartIsReg(modrm)) {
13383 do_MMX_preamble();
13384 putXMMReg( gregOfRexRM(pfx,modrm),
13385 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
13386 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13387 nameXMMReg(gregOfRexRM(pfx,modrm)));
13388 delta += 1;
13389 goto decode_success;
13390 }
13391 /* apparently no mem case for this insn */
13392 }
13393 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
13394 or lo half xmm). */
13395 if (have66noF2noF3(pfx)
13396 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13397 modrm = getUChar(delta);
13398 if (epartIsReg(modrm)) {
13399 /* fall through, awaiting test case */
13400 /* dst: lo half copied, hi half zeroed */
13401 } else {
13402 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13403 storeLE( mkexpr(addr),
13404 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
13405 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
13406 delta += alen;
13407 goto decode_success;
13408 }
13409 }
13410 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
13411 if (haveF2no66noF3(pfx) && sz == 4) {
13412 modrm = getUChar(delta);
13413 if (epartIsReg(modrm)) {
13414 do_MMX_preamble();
13415 putMMXReg( gregLO3ofRM(modrm),
13416 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
13417 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13418 nameMMXReg(gregLO3ofRM(modrm)));
13419 delta += 1;
13420 goto decode_success;
13421 }
13422 /* apparently no mem case for this insn */
13423 }
13424 break;
13425
13426 case 0xD7:
13427 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
13428 lanes in xmm(E), turn them into a byte, and put
13429 zero-extend of it in ireg(G). Doing this directly is just
13430 too cumbersome; give up therefore and call a helper. */
13431 if (have66noF2noF3(pfx)
sewardj8ef22422012-05-24 16:29:18 +000013432 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
13433 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
13434 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
13435 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013436 }
13437 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13438 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
13439 mmx(G), turn them into a byte, and put zero-extend of it in
13440 ireg(G). */
tom558fc972012-02-24 12:16:11 +000013441 if (haveNo66noF2noF3(pfx)
13442 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardj80611e32012-01-20 13:07:24 +000013443 modrm = getUChar(delta);
13444 if (epartIsReg(modrm)) {
13445 do_MMX_preamble();
13446 t0 = newTemp(Ity_I64);
13447 t1 = newTemp(Ity_I64);
13448 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
13449 assign(t1, mkIRExprCCall(
13450 Ity_I64, 0/*regparms*/,
13451 "amd64g_calculate_mmx_pmovmskb",
13452 &amd64g_calculate_mmx_pmovmskb,
13453 mkIRExprVec_1(mkexpr(t0))));
13454 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
13455 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13456 nameIReg32(gregOfRexRM(pfx,modrm)));
13457 delta += 1;
13458 goto decode_success;
13459 }
13460 /* else fall through */
13461 }
13462 break;
13463
13464 case 0xD8:
sewardj251b59e2012-05-25 13:51:07 +000013465 /* 66 0F D8 = PSUBUSB */
sewardj80611e32012-01-20 13:07:24 +000013466 if (have66noF2noF3(pfx) && sz == 2) {
13467 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13468 "psubusb", Iop_QSub8Ux16, False );
13469 goto decode_success;
13470 }
13471 break;
13472
13473 case 0xD9:
sewardj4f228902012-06-21 09:17:58 +000013474 /* 66 0F D9 = PSUBUSW */
sewardj80611e32012-01-20 13:07:24 +000013475 if (have66noF2noF3(pfx) && sz == 2) {
13476 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13477 "psubusw", Iop_QSub16Ux8, False );
13478 goto decode_success;
13479 }
13480 break;
13481
13482 case 0xDA:
13483 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13484 /* 0F DA = PMINUB -- 8x8 unsigned min */
13485 if (haveNo66noF2noF3(pfx) && sz == 4) {
13486 do_MMX_preamble();
13487 delta = dis_MMXop_regmem_to_reg (
13488 vbi, pfx, delta, opc, "pminub", False );
13489 goto decode_success;
13490 }
13491 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
13492 if (have66noF2noF3(pfx) && sz == 2) {
13493 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13494 "pminub", Iop_Min8Ux16, False );
13495 goto decode_success;
13496 }
13497 break;
13498
13499 case 0xDB:
13500 /* 66 0F DB = PAND */
13501 if (have66noF2noF3(pfx) && sz == 2) {
13502 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
13503 goto decode_success;
13504 }
13505 break;
13506
13507 case 0xDC:
13508 /* 66 0F DC = PADDUSB */
13509 if (have66noF2noF3(pfx) && sz == 2) {
13510 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13511 "paddusb", Iop_QAdd8Ux16, False );
13512 goto decode_success;
13513 }
13514 break;
13515
13516 case 0xDD:
13517 /* 66 0F DD = PADDUSW */
13518 if (have66noF2noF3(pfx) && sz == 2) {
13519 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13520 "paddusw", Iop_QAdd16Ux8, False );
13521 goto decode_success;
13522 }
13523 break;
13524
13525 case 0xDE:
13526 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13527 /* 0F DE = PMAXUB -- 8x8 unsigned max */
13528 if (haveNo66noF2noF3(pfx) && sz == 4) {
13529 do_MMX_preamble();
13530 delta = dis_MMXop_regmem_to_reg (
13531 vbi, pfx, delta, opc, "pmaxub", False );
13532 goto decode_success;
13533 }
13534 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
13535 if (have66noF2noF3(pfx) && sz == 2) {
13536 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13537 "pmaxub", Iop_Max8Ux16, False );
13538 goto decode_success;
13539 }
13540 break;
13541
13542 case 0xDF:
13543 /* 66 0F DF = PANDN */
13544 if (have66noF2noF3(pfx) && sz == 2) {
13545 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
13546 goto decode_success;
13547 }
13548 break;
13549
13550 case 0xE0:
13551 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13552 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
13553 if (haveNo66noF2noF3(pfx) && sz == 4) {
13554 do_MMX_preamble();
13555 delta = dis_MMXop_regmem_to_reg (
13556 vbi, pfx, delta, opc, "pavgb", False );
13557 goto decode_success;
13558 }
13559 /* 66 0F E0 = PAVGB */
13560 if (have66noF2noF3(pfx) && sz == 2) {
13561 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13562 "pavgb", Iop_Avg8Ux16, False );
13563 goto decode_success;
13564 }
13565 break;
13566
13567 case 0xE1:
13568 /* 66 0F E1 = PSRAW by E */
13569 if (have66noF2noF3(pfx) && sz == 2) {
13570 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
13571 goto decode_success;
13572 }
13573 break;
13574
13575 case 0xE2:
13576 /* 66 0F E2 = PSRAD by E */
13577 if (have66noF2noF3(pfx) && sz == 2) {
13578 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
13579 goto decode_success;
13580 }
13581 break;
13582
13583 case 0xE3:
13584 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13585 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
13586 if (haveNo66noF2noF3(pfx) && sz == 4) {
13587 do_MMX_preamble();
13588 delta = dis_MMXop_regmem_to_reg (
13589 vbi, pfx, delta, opc, "pavgw", False );
13590 goto decode_success;
13591 }
13592 /* 66 0F E3 = PAVGW */
13593 if (have66noF2noF3(pfx) && sz == 2) {
13594 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13595 "pavgw", Iop_Avg16Ux8, False );
13596 goto decode_success;
13597 }
13598 break;
13599
13600 case 0xE4:
13601 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13602 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
13603 if (haveNo66noF2noF3(pfx) && sz == 4) {
13604 do_MMX_preamble();
13605 delta = dis_MMXop_regmem_to_reg (
13606 vbi, pfx, delta, opc, "pmuluh", False );
13607 goto decode_success;
13608 }
13609 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
13610 if (have66noF2noF3(pfx) && sz == 2) {
13611 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13612 "pmulhuw", Iop_MulHi16Ux8, False );
13613 goto decode_success;
13614 }
13615 break;
13616
13617 case 0xE5:
13618 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
13619 if (have66noF2noF3(pfx) && sz == 2) {
13620 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13621 "pmulhw", Iop_MulHi16Sx8, False );
13622 goto decode_success;
13623 }
13624 break;
13625
13626 case 0xE6:
13627 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
13628 lo half xmm(G), and zero upper half, rounding towards zero */
13629 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
13630 lo half xmm(G), according to prevailing rounding mode, and zero
13631 upper half */
13632 if ( (haveF2no66noF3(pfx) && sz == 4)
13633 || (have66noF2noF3(pfx) && sz == 2) ) {
sewardj66becf32012-06-18 23:15:16 +000013634 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
13635 toBool(sz == 2)/*r2zero*/);
sewardj80611e32012-01-20 13:07:24 +000013636 goto decode_success;
13637 }
13638 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
13639 F64 in xmm(G) */
13640 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj4b1cc832012-06-13 11:10:20 +000013641 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000013642 goto decode_success;
13643 }
13644 break;
13645
13646 case 0xE7:
13647 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13648 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
13649 Intel manual does not say anything about the usual business of
13650 the FP reg tags getting trashed whenever an MMX insn happens.
13651 So we just leave them alone.
13652 */
13653 if (haveNo66noF2noF3(pfx) && sz == 4) {
13654 modrm = getUChar(delta);
13655 if (!epartIsReg(modrm)) {
13656 /* do_MMX_preamble(); Intel docs don't specify this */
13657 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13658 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
13659 DIP("movntq %s,%s\n", dis_buf,
13660 nameMMXReg(gregLO3ofRM(modrm)));
13661 delta += alen;
13662 goto decode_success;
13663 }
13664 /* else fall through */
13665 }
13666 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
13667 if (have66noF2noF3(pfx) && sz == 2) {
13668 modrm = getUChar(delta);
13669 if (!epartIsReg(modrm)) {
13670 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13671 gen_SEGV_if_not_16_aligned( addr );
13672 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13673 DIP("movntdq %s,%s\n", dis_buf,
13674 nameXMMReg(gregOfRexRM(pfx,modrm)));
13675 delta += alen;
13676 goto decode_success;
13677 }
13678 /* else fall through */
13679 }
13680 break;
13681
13682 case 0xE8:
13683 /* 66 0F E8 = PSUBSB */
13684 if (have66noF2noF3(pfx) && sz == 2) {
13685 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13686 "psubsb", Iop_QSub8Sx16, False );
13687 goto decode_success;
13688 }
13689 break;
13690
13691 case 0xE9:
13692 /* 66 0F E9 = PSUBSW */
13693 if (have66noF2noF3(pfx) && sz == 2) {
13694 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13695 "psubsw", Iop_QSub16Sx8, False );
13696 goto decode_success;
13697 }
13698 break;
13699
13700 case 0xEA:
13701 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13702 /* 0F EA = PMINSW -- 16x4 signed min */
13703 if (haveNo66noF2noF3(pfx) && sz == 4) {
13704 do_MMX_preamble();
13705 delta = dis_MMXop_regmem_to_reg (
13706 vbi, pfx, delta, opc, "pminsw", False );
13707 goto decode_success;
13708 }
13709 /* 66 0F EA = PMINSW -- 16x8 signed min */
13710 if (have66noF2noF3(pfx) && sz == 2) {
13711 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13712 "pminsw", Iop_Min16Sx8, False );
13713 goto decode_success;
13714 }
13715 break;
13716
13717 case 0xEB:
13718 /* 66 0F EB = POR */
13719 if (have66noF2noF3(pfx) && sz == 2) {
13720 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
13721 goto decode_success;
13722 }
13723 break;
13724
13725 case 0xEC:
13726 /* 66 0F EC = PADDSB */
13727 if (have66noF2noF3(pfx) && sz == 2) {
13728 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13729 "paddsb", Iop_QAdd8Sx16, False );
13730 goto decode_success;
13731 }
13732 break;
13733
13734 case 0xED:
13735 /* 66 0F ED = PADDSW */
13736 if (have66noF2noF3(pfx) && sz == 2) {
13737 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13738 "paddsw", Iop_QAdd16Sx8, False );
13739 goto decode_success;
13740 }
13741 break;
13742
13743 case 0xEE:
13744 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13745 /* 0F EE = PMAXSW -- 16x4 signed max */
13746 if (haveNo66noF2noF3(pfx) && sz == 4) {
13747 do_MMX_preamble();
13748 delta = dis_MMXop_regmem_to_reg (
13749 vbi, pfx, delta, opc, "pmaxsw", False );
13750 goto decode_success;
13751 }
13752 /* 66 0F EE = PMAXSW -- 16x8 signed max */
13753 if (have66noF2noF3(pfx) && sz == 2) {
13754 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13755 "pmaxsw", Iop_Max16Sx8, False );
13756 goto decode_success;
13757 }
13758 break;
13759
13760 case 0xEF:
13761 /* 66 0F EF = PXOR */
13762 if (have66noF2noF3(pfx) && sz == 2) {
13763 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
13764 goto decode_success;
13765 }
13766 break;
13767
13768 case 0xF1:
13769 /* 66 0F F1 = PSLLW by E */
13770 if (have66noF2noF3(pfx) && sz == 2) {
13771 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
13772 goto decode_success;
13773 }
13774 break;
13775
13776 case 0xF2:
13777 /* 66 0F F2 = PSLLD by E */
13778 if (have66noF2noF3(pfx) && sz == 2) {
13779 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
13780 goto decode_success;
13781 }
13782 break;
13783
13784 case 0xF3:
13785 /* 66 0F F3 = PSLLQ by E */
13786 if (have66noF2noF3(pfx) && sz == 2) {
13787 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
13788 goto decode_success;
13789 }
13790 break;
13791
13792 case 0xF4:
13793 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
13794 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
13795 half */
sewardj80611e32012-01-20 13:07:24 +000013796 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000013797 IRTemp sV = newTemp(Ity_V128);
13798 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013799 modrm = getUChar(delta);
sewardje8a7eb72012-06-12 14:59:17 +000013800 UInt rG = gregOfRexRM(pfx,modrm);
13801 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000013802 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000013803 UInt rE = eregOfRexRM(pfx,modrm);
13804 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000013805 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000013806 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013807 } else {
13808 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13809 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13810 delta += alen;
sewardje8a7eb72012-06-12 14:59:17 +000013811 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013812 }
sewardje8a7eb72012-06-12 14:59:17 +000013813 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
sewardj80611e32012-01-20 13:07:24 +000013814 goto decode_success;
13815 }
13816 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
13817 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
13818 0 to form 64-bit result */
13819 if (haveNo66noF2noF3(pfx) && sz == 4) {
13820 IRTemp sV = newTemp(Ity_I64);
13821 IRTemp dV = newTemp(Ity_I64);
13822 t1 = newTemp(Ity_I32);
13823 t0 = newTemp(Ity_I32);
13824 modrm = getUChar(delta);
13825
13826 do_MMX_preamble();
13827 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
13828
13829 if (epartIsReg(modrm)) {
13830 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13831 delta += 1;
13832 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13833 nameMMXReg(gregLO3ofRM(modrm)));
13834 } else {
13835 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13836 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13837 delta += alen;
13838 DIP("pmuludq %s,%s\n", dis_buf,
13839 nameMMXReg(gregLO3ofRM(modrm)));
13840 }
13841
13842 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
13843 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
13844 putMMXReg( gregLO3ofRM(modrm),
13845 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
13846 goto decode_success;
13847 }
13848 break;
13849
13850 case 0xF5:
13851 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
13852 E(xmm or mem) to G(xmm) */
13853 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000013854 IRTemp sV = newTemp(Ity_V128);
13855 IRTemp dV = newTemp(Ity_V128);
13856 modrm = getUChar(delta);
13857 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000013858 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000013859 UInt rE = eregOfRexRM(pfx,modrm);
13860 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000013861 delta += 1;
sewardj89378162012-06-24 12:12:20 +000013862 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013863 } else {
13864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj89378162012-06-24 12:12:20 +000013865 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000013866 delta += alen;
sewardj89378162012-06-24 12:12:20 +000013867 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013868 }
sewardj89378162012-06-24 12:12:20 +000013869 assign( dV, getXMMReg(rG) );
13870 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
sewardj80611e32012-01-20 13:07:24 +000013871 goto decode_success;
13872 }
13873 break;
13874
13875 case 0xF6:
13876 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13877 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
13878 if (haveNo66noF2noF3(pfx) && sz == 4) {
13879 do_MMX_preamble();
13880 delta = dis_MMXop_regmem_to_reg (
13881 vbi, pfx, delta, opc, "psadbw", False );
13882 goto decode_success;
13883 }
13884 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
13885 from E(xmm or mem) to G(xmm) */
13886 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000013887 IRTemp sV = newTemp(Ity_V128);
13888 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013889 modrm = getUChar(delta);
sewardj82096922012-06-24 14:57:59 +000013890 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000013891 if (epartIsReg(modrm)) {
sewardj82096922012-06-24 14:57:59 +000013892 UInt rE = eregOfRexRM(pfx,modrm);
13893 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000013894 delta += 1;
sewardj82096922012-06-24 14:57:59 +000013895 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013896 } else {
13897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj82096922012-06-24 14:57:59 +000013898 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000013899 delta += alen;
sewardj82096922012-06-24 14:57:59 +000013900 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013901 }
sewardj82096922012-06-24 14:57:59 +000013902 assign( dV, getXMMReg(rG) );
13903 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
13904
sewardj80611e32012-01-20 13:07:24 +000013905 goto decode_success;
13906 }
13907 break;
13908
13909 case 0xF7:
13910 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13911 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
13912 if (haveNo66noF2noF3(pfx) && sz == 4) {
13913 Bool ok = False;
13914 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
13915 if (ok) goto decode_success;
13916 }
13917 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
sewardj8eb7ae82012-06-24 14:00:27 +000013918 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
13919 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
13920 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013921 }
13922 break;
13923
13924 case 0xF8:
13925 /* 66 0F F8 = PSUBB */
13926 if (have66noF2noF3(pfx) && sz == 2) {
13927 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13928 "psubb", Iop_Sub8x16, False );
13929 goto decode_success;
13930 }
13931 break;
13932
13933 case 0xF9:
13934 /* 66 0F F9 = PSUBW */
13935 if (have66noF2noF3(pfx) && sz == 2) {
13936 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13937 "psubw", Iop_Sub16x8, False );
13938 goto decode_success;
13939 }
13940 break;
13941
13942 case 0xFA:
13943 /* 66 0F FA = PSUBD */
13944 if (have66noF2noF3(pfx) && sz == 2) {
13945 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13946 "psubd", Iop_Sub32x4, False );
13947 goto decode_success;
13948 }
13949 break;
13950
13951 case 0xFB:
13952 /* 66 0F FB = PSUBQ */
13953 if (have66noF2noF3(pfx) && sz == 2) {
13954 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13955 "psubq", Iop_Sub64x2, False );
13956 goto decode_success;
13957 }
13958 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
13959 /* 0F FB = PSUBQ -- sub 64x1 */
13960 if (haveNo66noF2noF3(pfx) && sz == 4) {
13961 do_MMX_preamble();
13962 delta = dis_MMXop_regmem_to_reg (
13963 vbi, pfx, delta, opc, "psubq", False );
13964 goto decode_success;
13965 }
13966 break;
13967
13968 case 0xFC:
13969 /* 66 0F FC = PADDB */
13970 if (have66noF2noF3(pfx) && sz == 2) {
13971 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13972 "paddb", Iop_Add8x16, False );
13973 goto decode_success;
13974 }
13975 break;
13976
13977 case 0xFD:
13978 /* 66 0F FD = PADDW */
13979 if (have66noF2noF3(pfx) && sz == 2) {
13980 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13981 "paddw", Iop_Add16x8, False );
13982 goto decode_success;
13983 }
13984 break;
13985
13986 case 0xFE:
13987 /* 66 0F FE = PADDD */
13988 if (have66noF2noF3(pfx) && sz == 2) {
13989 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13990 "paddd", Iop_Add32x4, False );
13991 goto decode_success;
13992 }
13993 break;
13994
13995 default:
13996 goto decode_failure;
13997
13998 }
13999
14000 decode_failure:
14001 *decode_OK = False;
14002 return deltaIN;
14003
14004 decode_success:
14005 *decode_OK = True;
14006 return delta;
14007}
14008
14009
14010/*------------------------------------------------------------*/
14011/*--- ---*/
14012/*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
14013/*--- ---*/
14014/*------------------------------------------------------------*/
14015
sewardjc4530ae2012-05-21 10:18:49 +000014016static Long dis_MOVDDUP_128 ( VexAbiInfo* vbi, Prefix pfx,
14017 Long delta, Bool isAvx )
14018{
14019 IRTemp addr = IRTemp_INVALID;
14020 Int alen = 0;
14021 HChar dis_buf[50];
14022 IRTemp sV = newTemp(Ity_V128);
14023 IRTemp d0 = newTemp(Ity_I64);
14024 UChar modrm = getUChar(delta);
14025 UInt rG = gregOfRexRM(pfx,modrm);
14026 if (epartIsReg(modrm)) {
14027 UInt rE = eregOfRexRM(pfx,modrm);
14028 assign( sV, getXMMReg(rE) );
14029 DIP("%smovddup %s,%s\n",
14030 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
14031 delta += 1;
14032 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
14033 } else {
14034 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14035 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14036 DIP("%smovddup %s,%s\n",
14037 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
14038 delta += alen;
14039 }
14040 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14041 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
14042 return delta;
14043}
14044
14045
sewardj82096922012-06-24 14:57:59 +000014046static Long dis_MOVDDUP_256 ( VexAbiInfo* vbi, Prefix pfx,
14047 Long delta )
14048{
14049 IRTemp addr = IRTemp_INVALID;
14050 Int alen = 0;
14051 HChar dis_buf[50];
14052 IRTemp d0 = newTemp(Ity_I64);
14053 IRTemp d1 = newTemp(Ity_I64);
14054 UChar modrm = getUChar(delta);
14055 UInt rG = gregOfRexRM(pfx,modrm);
14056 if (epartIsReg(modrm)) {
14057 UInt rE = eregOfRexRM(pfx,modrm);
14058 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
14059 delta += 1;
14060 assign ( d0, getYMMRegLane64(rE, 0) );
14061 assign ( d1, getYMMRegLane64(rE, 2) );
14062 } else {
14063 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14064 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14065 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
14066 mkexpr(addr), mkU64(16))) );
14067 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
14068 delta += alen;
14069 }
14070 putYMMRegLane64( rG, 0, mkexpr(d0) );
14071 putYMMRegLane64( rG, 1, mkexpr(d0) );
14072 putYMMRegLane64( rG, 2, mkexpr(d1) );
14073 putYMMRegLane64( rG, 3, mkexpr(d1) );
14074 return delta;
14075}
14076
14077
sewardj15ad1942012-06-20 10:21:05 +000014078static Long dis_MOVSxDUP_128 ( VexAbiInfo* vbi, Prefix pfx,
14079 Long delta, Bool isAvx, Bool isL )
14080{
14081 IRTemp addr = IRTemp_INVALID;
14082 Int alen = 0;
14083 HChar dis_buf[50];
14084 IRTemp sV = newTemp(Ity_V128);
14085 UChar modrm = getUChar(delta);
14086 UInt rG = gregOfRexRM(pfx,modrm);
14087 IRTemp s3, s2, s1, s0;
14088 s3 = s2 = s1 = s0 = IRTemp_INVALID;
14089 if (epartIsReg(modrm)) {
14090 UInt rE = eregOfRexRM(pfx,modrm);
14091 assign( sV, getXMMReg(rE) );
14092 DIP("%smovs%cdup %s,%s\n",
14093 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
14094 delta += 1;
14095 } else {
14096 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14097 if (!isAvx)
14098 gen_SEGV_if_not_16_aligned( addr );
14099 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14100 DIP("%smovs%cdup %s,%s\n",
14101 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
14102 delta += alen;
14103 }
14104 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14105 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14106 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
14107 : mkV128from32s( s3, s3, s1, s1 ) );
14108 return delta;
14109}
14110
14111
14112static Long dis_MOVSxDUP_256 ( VexAbiInfo* vbi, Prefix pfx,
14113 Long delta, Bool isL )
14114{
14115 IRTemp addr = IRTemp_INVALID;
14116 Int alen = 0;
14117 HChar dis_buf[50];
14118 IRTemp sV = newTemp(Ity_V256);
sewardj15ad1942012-06-20 10:21:05 +000014119 UChar modrm = getUChar(delta);
14120 UInt rG = gregOfRexRM(pfx,modrm);
14121 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
14122 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
14123 if (epartIsReg(modrm)) {
14124 UInt rE = eregOfRexRM(pfx,modrm);
14125 assign( sV, getYMMReg(rE) );
14126 DIP("vmovs%cdup %s,%s\n",
14127 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
14128 delta += 1;
14129 } else {
14130 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14131 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
14132 DIP("vmovs%cdup %s,%s\n",
14133 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
14134 delta += alen;
14135 }
sewardj4f228902012-06-21 09:17:58 +000014136 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
sewardj15ad1942012-06-20 10:21:05 +000014137 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
14138 : mkV128from32s( s7, s7, s5, s5 ) );
14139 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
14140 : mkV128from32s( s3, s3, s1, s1 ) );
14141 return delta;
14142}
14143
14144
sewardjadf357c2012-06-24 13:44:17 +000014145static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14146{
14147 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
14148 IRTemp leftV = newTemp(Ity_V128);
14149 IRTemp rightV = newTemp(Ity_V128);
14150 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
14151
14152 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14153 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
14154
14155 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
14156 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
14157
14158 IRTemp res = newTemp(Ity_V128);
14159 assign( res, binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
14160 mkexpr(leftV), mkexpr(rightV) ) );
14161 return res;
14162}
14163
14164
14165static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14166{
14167 IRTemp s1, s0, d1, d0;
14168 IRTemp leftV = newTemp(Ity_V128);
14169 IRTemp rightV = newTemp(Ity_V128);
14170 s1 = s0 = d1 = d0 = IRTemp_INVALID;
14171
14172 breakupV128to64s( sV, &s1, &s0 );
14173 breakupV128to64s( dV, &d1, &d0 );
14174
14175 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
14176 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
14177
14178 IRTemp res = newTemp(Ity_V128);
14179 assign( res, binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
14180 mkexpr(leftV), mkexpr(rightV) ) );
14181 return res;
14182}
14183
14184
sewardj80611e32012-01-20 13:07:24 +000014185__attribute__((noinline))
14186static
14187Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
14188 VexAbiInfo* vbi,
14189 Prefix pfx, Int sz, Long deltaIN )
14190{
14191 IRTemp addr = IRTemp_INVALID;
14192 UChar modrm = 0;
14193 Int alen = 0;
14194 HChar dis_buf[50];
14195
14196 *decode_OK = False;
14197
14198 Long delta = deltaIN;
14199 UChar opc = getUChar(delta);
14200 delta++;
14201 switch (opc) {
14202
14203 case 0x12:
14204 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
14205 duplicating some lanes (2:2:0:0). */
14206 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014207 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14208 True/*isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014209 goto decode_success;
14210 }
14211 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
14212 duplicating some lanes (0:1:0:1). */
14213 if (haveF2no66noF3(pfx)
14214 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000014215 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000014216 goto decode_success;
14217 }
14218 break;
14219
14220 case 0x16:
14221 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
14222 duplicating some lanes (3:3:1:1). */
14223 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014224 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14225 False/*!isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014226 goto decode_success;
14227 }
14228 break;
14229
14230 case 0x7C:
14231 case 0x7D:
14232 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
14233 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
14234 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014235 IRTemp eV = newTemp(Ity_V128);
14236 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014237 Bool isAdd = opc == 0x7C;
14238 HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014239 modrm = getUChar(delta);
14240 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014241 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014242 UInt rE = eregOfRexRM(pfx,modrm);
14243 assign( eV, getXMMReg(rE) );
14244 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014245 delta += 1;
14246 } else {
14247 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14248 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014249 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014250 delta += alen;
14251 }
14252
sewardjadf357c2012-06-24 13:44:17 +000014253 assign( gV, getXMMReg(rG) );
14254 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014255 goto decode_success;
14256 }
14257 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
14258 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
14259 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000014260 IRTemp eV = newTemp(Ity_V128);
14261 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014262 Bool isAdd = opc == 0x7C;
14263 HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014264 modrm = getUChar(delta);
14265 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014266 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014267 UInt rE = eregOfRexRM(pfx,modrm);
14268 assign( eV, getXMMReg(rE) );
14269 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014270 delta += 1;
14271 } else {
14272 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14273 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014274 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014275 delta += alen;
14276 }
14277
sewardjadf357c2012-06-24 13:44:17 +000014278 assign( gV, getXMMReg(rG) );
14279 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014280 goto decode_success;
14281 }
14282 break;
14283
14284 case 0xD0:
14285 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
14286 if (have66noF2noF3(pfx) && sz == 2) {
14287 IRTemp eV = newTemp(Ity_V128);
14288 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014289 modrm = getUChar(delta);
14290 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014291 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014292 UInt rE = eregOfRexRM(pfx,modrm);
14293 assign( eV, getXMMReg(rE) );
14294 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014295 delta += 1;
14296 } else {
14297 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14298 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014299 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014300 delta += alen;
14301 }
14302
sewardj89378162012-06-24 12:12:20 +000014303 assign( gV, getXMMReg(rG) );
14304 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014305 goto decode_success;
14306 }
14307 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
14308 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014309 IRTemp eV = newTemp(Ity_V128);
14310 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014311 modrm = getUChar(delta);
14312 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014313
14314 modrm = getUChar(delta);
14315 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014316 UInt rE = eregOfRexRM(pfx,modrm);
14317 assign( eV, getXMMReg(rE) );
14318 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014319 delta += 1;
14320 } else {
14321 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14322 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014323 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014324 delta += alen;
14325 }
14326
sewardj89378162012-06-24 12:12:20 +000014327 assign( gV, getXMMReg(rG) );
14328 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014329 goto decode_success;
14330 }
14331 break;
14332
14333 case 0xF0:
14334 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
14335 if (haveF2no66noF3(pfx) && sz == 4) {
14336 modrm = getUChar(delta);
14337 if (epartIsReg(modrm)) {
14338 goto decode_failure;
14339 } else {
14340 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14341 putXMMReg( gregOfRexRM(pfx,modrm),
14342 loadLE(Ity_V128, mkexpr(addr)) );
14343 DIP("lddqu %s,%s\n", dis_buf,
14344 nameXMMReg(gregOfRexRM(pfx,modrm)));
14345 delta += alen;
14346 }
14347 goto decode_success;
14348 }
14349 break;
14350
14351 default:
14352 goto decode_failure;
14353
14354 }
14355
14356 decode_failure:
14357 *decode_OK = False;
14358 return deltaIN;
14359
14360 decode_success:
14361 *decode_OK = True;
14362 return delta;
14363}
14364
14365
14366/*------------------------------------------------------------*/
14367/*--- ---*/
14368/*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
14369/*--- ---*/
14370/*------------------------------------------------------------*/
14371
sewardjc4530ae2012-05-21 10:18:49 +000014372static
14373IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
14374{
14375 IRTemp sHi = newTemp(Ity_I64);
14376 IRTemp sLo = newTemp(Ity_I64);
14377 IRTemp dHi = newTemp(Ity_I64);
14378 IRTemp dLo = newTemp(Ity_I64);
14379 IRTemp rHi = newTemp(Ity_I64);
14380 IRTemp rLo = newTemp(Ity_I64);
14381 IRTemp sevens = newTemp(Ity_I64);
14382 IRTemp mask0x80hi = newTemp(Ity_I64);
14383 IRTemp mask0x80lo = newTemp(Ity_I64);
14384 IRTemp maskBit3hi = newTemp(Ity_I64);
14385 IRTemp maskBit3lo = newTemp(Ity_I64);
14386 IRTemp sAnd7hi = newTemp(Ity_I64);
14387 IRTemp sAnd7lo = newTemp(Ity_I64);
14388 IRTemp permdHi = newTemp(Ity_I64);
14389 IRTemp permdLo = newTemp(Ity_I64);
14390 IRTemp res = newTemp(Ity_V128);
14391
14392 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
14393 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
14394 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
14395 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
14396
14397 assign( sevens, mkU64(0x0707070707070707ULL) );
14398
14399 /* mask0x80hi = Not(SarN8x8(sHi,7))
14400 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
14401 sAnd7hi = And(sHi,sevens)
14402 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
14403 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
14404 rHi = And(permdHi,mask0x80hi)
14405 */
14406 assign(
14407 mask0x80hi,
14408 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
14409
14410 assign(
14411 maskBit3hi,
14412 binop(Iop_SarN8x8,
14413 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
14414 mkU8(7)));
14415
14416 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
14417
14418 assign(
14419 permdHi,
14420 binop(
14421 Iop_Or64,
14422 binop(Iop_And64,
14423 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
14424 mkexpr(maskBit3hi)),
14425 binop(Iop_And64,
14426 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
14427 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
14428
14429 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
14430
14431 /* And the same for the lower half of the result. What fun. */
14432
14433 assign(
14434 mask0x80lo,
14435 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
14436
14437 assign(
14438 maskBit3lo,
14439 binop(Iop_SarN8x8,
14440 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
14441 mkU8(7)));
14442
14443 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
14444
14445 assign(
14446 permdLo,
14447 binop(
14448 Iop_Or64,
14449 binop(Iop_And64,
14450 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
14451 mkexpr(maskBit3lo)),
14452 binop(Iop_And64,
14453 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
14454 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
14455
14456 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
14457
14458 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
14459 return res;
14460}
14461
14462
sewardj8516a1f2012-06-24 14:26:30 +000014463static Long dis_PHADD_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
14464 Bool isAvx, UChar opc )
14465{
14466 IRTemp addr = IRTemp_INVALID;
14467 Int alen = 0;
14468 HChar dis_buf[50];
14469 HChar* str = "???";
14470 IROp opV64 = Iop_INVALID;
14471 IROp opCatO = Iop_CatOddLanes16x4;
14472 IROp opCatE = Iop_CatEvenLanes16x4;
14473 IRTemp sV = newTemp(Ity_V128);
14474 IRTemp dV = newTemp(Ity_V128);
14475 IRTemp sHi = newTemp(Ity_I64);
14476 IRTemp sLo = newTemp(Ity_I64);
14477 IRTemp dHi = newTemp(Ity_I64);
14478 IRTemp dLo = newTemp(Ity_I64);
14479 UChar modrm = getUChar(delta);
14480 UInt rG = gregOfRexRM(pfx,modrm);
14481 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
14482
14483 switch (opc) {
14484 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
14485 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
14486 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
14487 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
14488 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
14489 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
14490 default: vassert(0);
14491 }
14492 if (opc == 0x02 || opc == 0x06) {
14493 opCatO = Iop_InterleaveHI32x2;
14494 opCatE = Iop_InterleaveLO32x2;
14495 }
14496
14497 assign( dV, getXMMReg(rV) );
14498
14499 if (epartIsReg(modrm)) {
14500 UInt rE = eregOfRexRM(pfx,modrm);
14501 assign( sV, getXMMReg(rE) );
14502 DIP("ph%s %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
14503 delta += 1;
14504 } else {
14505 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14506 if (!isAvx)
14507 gen_SEGV_if_not_16_aligned( addr );
14508 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14509 DIP("ph%s %s,%s\n", str, dis_buf, nameXMMReg(rG));
14510 delta += alen;
14511 }
14512
14513 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
14514 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
14515 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
14516 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
14517
14518 /* This isn't a particularly efficient way to compute the
14519 result, but at least it avoids a proliferation of IROps,
14520 hence avoids complication all the backends. */
14521
14522 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14523 ( rG,
14524 binop(Iop_64HLtoV128,
14525 binop(opV64,
14526 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
14527 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
14528 binop(opV64,
14529 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
14530 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
14531 return delta;
14532}
14533
14534
14535static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
14536{
14537 IRTemp sVoddsSX = newTemp(Ity_V128);
14538 IRTemp sVevensSX = newTemp(Ity_V128);
14539 IRTemp dVoddsZX = newTemp(Ity_V128);
14540 IRTemp dVevensZX = newTemp(Ity_V128);
14541 /* compute dV unsigned x sV signed */
14542 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
14543 assign( sVevensSX, binop(Iop_SarN16x8,
14544 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
14545 mkU8(8)) );
14546 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
14547 assign( dVevensZX, binop(Iop_ShrN16x8,
14548 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
14549 mkU8(8)) );
14550
14551 IRTemp res = newTemp(Ity_V128);
14552 assign( res, binop(Iop_QAdd16Sx8,
14553 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
14554 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
14555 )
14556 );
14557 return res;
14558}
14559
14560
sewardj80611e32012-01-20 13:07:24 +000014561__attribute__((noinline))
14562static
14563Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
14564 VexAbiInfo* vbi,
14565 Prefix pfx, Int sz, Long deltaIN )
14566{
14567 IRTemp addr = IRTemp_INVALID;
14568 UChar modrm = 0;
14569 Int alen = 0;
14570 HChar dis_buf[50];
14571
14572 *decode_OK = False;
14573
14574 Long delta = deltaIN;
14575 UChar opc = getUChar(delta);
14576 delta++;
14577 switch (opc) {
14578
14579 case 0x00:
14580 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
14581 if (have66noF2noF3(pfx)
14582 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000014583 IRTemp sV = newTemp(Ity_V128);
14584 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014585
14586 modrm = getUChar(delta);
14587 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14588
14589 if (epartIsReg(modrm)) {
14590 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14591 delta += 1;
14592 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14593 nameXMMReg(gregOfRexRM(pfx,modrm)));
14594 } else {
14595 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14596 gen_SEGV_if_not_16_aligned( addr );
14597 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14598 delta += alen;
14599 DIP("pshufb %s,%s\n", dis_buf,
14600 nameXMMReg(gregOfRexRM(pfx,modrm)));
14601 }
14602
sewardjc4530ae2012-05-21 10:18:49 +000014603 IRTemp res = math_PSHUFB_XMM( dV, sV );
14604 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000014605 goto decode_success;
14606 }
14607 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
14608 if (haveNo66noF2noF3(pfx) && sz == 4) {
14609 IRTemp sV = newTemp(Ity_I64);
14610 IRTemp dV = newTemp(Ity_I64);
14611
14612 modrm = getUChar(delta);
14613 do_MMX_preamble();
14614 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14615
14616 if (epartIsReg(modrm)) {
14617 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14618 delta += 1;
14619 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14620 nameMMXReg(gregLO3ofRM(modrm)));
14621 } else {
14622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14623 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14624 delta += alen;
14625 DIP("pshufb %s,%s\n", dis_buf,
14626 nameMMXReg(gregLO3ofRM(modrm)));
14627 }
14628
14629 putMMXReg(
14630 gregLO3ofRM(modrm),
14631 binop(
14632 Iop_And64,
14633 /* permute the lanes */
14634 binop(
14635 Iop_Perm8x8,
14636 mkexpr(dV),
14637 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
14638 ),
14639 /* mask off lanes which have (index & 0x80) == 0x80 */
14640 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
14641 )
14642 );
14643 goto decode_success;
14644 }
14645 break;
14646
14647 case 0x01:
14648 case 0x02:
14649 case 0x03:
14650 case 0x05:
14651 case 0x06:
14652 case 0x07:
14653 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
14654 G to G (xmm). */
14655 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
14656 G to G (xmm). */
14657 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
14658 xmm) and G to G (xmm). */
14659 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
14660 G to G (xmm). */
14661 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
14662 G to G (xmm). */
14663 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
14664 xmm) and G to G (xmm). */
14665 if (have66noF2noF3(pfx)
14666 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000014667 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000014668 goto decode_success;
14669 }
14670 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
14671 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
14672 to G (mmx). */
14673 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
14674 to G (mmx). */
14675 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
14676 mmx) and G to G (mmx). */
14677 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
14678 to G (mmx). */
14679 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
14680 to G (mmx). */
14681 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
14682 mmx) and G to G (mmx). */
14683 if (haveNo66noF2noF3(pfx) && sz == 4) {
14684 HChar* str = "???";
14685 IROp opV64 = Iop_INVALID;
14686 IROp opCatO = Iop_CatOddLanes16x4;
14687 IROp opCatE = Iop_CatEvenLanes16x4;
14688 IRTemp sV = newTemp(Ity_I64);
14689 IRTemp dV = newTemp(Ity_I64);
14690
14691 modrm = getUChar(delta);
14692
14693 switch (opc) {
14694 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
14695 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
14696 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
14697 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
14698 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
14699 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
14700 default: vassert(0);
14701 }
14702 if (opc == 0x02 || opc == 0x06) {
14703 opCatO = Iop_InterleaveHI32x2;
14704 opCatE = Iop_InterleaveLO32x2;
14705 }
14706
14707 do_MMX_preamble();
14708 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14709
14710 if (epartIsReg(modrm)) {
14711 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14712 delta += 1;
14713 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
14714 nameMMXReg(gregLO3ofRM(modrm)));
14715 } else {
14716 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14717 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14718 delta += alen;
14719 DIP("ph%s %s,%s\n", str, dis_buf,
14720 nameMMXReg(gregLO3ofRM(modrm)));
14721 }
14722
14723 putMMXReg(
14724 gregLO3ofRM(modrm),
14725 binop(opV64,
14726 binop(opCatE,mkexpr(sV),mkexpr(dV)),
14727 binop(opCatO,mkexpr(sV),mkexpr(dV))
14728 )
14729 );
14730 goto decode_success;
14731 }
14732 break;
14733
14734 case 0x04:
14735 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
14736 Unsigned Bytes (XMM) */
14737 if (have66noF2noF3(pfx)
14738 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000014739 IRTemp sV = newTemp(Ity_V128);
14740 IRTemp dV = newTemp(Ity_V128);
14741 modrm = getUChar(delta);
14742 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014743
sewardj8516a1f2012-06-24 14:26:30 +000014744 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000014745
14746 if (epartIsReg(modrm)) {
sewardj8516a1f2012-06-24 14:26:30 +000014747 UInt rE = eregOfRexRM(pfx,modrm);
14748 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014749 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000014750 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014751 } else {
14752 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14753 gen_SEGV_if_not_16_aligned( addr );
14754 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14755 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000014756 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014757 }
14758
sewardj8516a1f2012-06-24 14:26:30 +000014759 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014760 goto decode_success;
14761 }
14762 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
14763 Unsigned Bytes (MMX) */
14764 if (haveNo66noF2noF3(pfx) && sz == 4) {
14765 IRTemp sV = newTemp(Ity_I64);
14766 IRTemp dV = newTemp(Ity_I64);
14767 IRTemp sVoddsSX = newTemp(Ity_I64);
14768 IRTemp sVevensSX = newTemp(Ity_I64);
14769 IRTemp dVoddsZX = newTemp(Ity_I64);
14770 IRTemp dVevensZX = newTemp(Ity_I64);
14771
14772 modrm = getUChar(delta);
14773 do_MMX_preamble();
14774 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14775
14776 if (epartIsReg(modrm)) {
14777 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14778 delta += 1;
14779 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14780 nameMMXReg(gregLO3ofRM(modrm)));
14781 } else {
14782 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14783 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14784 delta += alen;
14785 DIP("pmaddubsw %s,%s\n", dis_buf,
14786 nameMMXReg(gregLO3ofRM(modrm)));
14787 }
14788
14789 /* compute dV unsigned x sV signed */
14790 assign( sVoddsSX,
14791 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
14792 assign( sVevensSX,
14793 binop(Iop_SarN16x4,
14794 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
14795 mkU8(8)) );
14796 assign( dVoddsZX,
14797 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
14798 assign( dVevensZX,
14799 binop(Iop_ShrN16x4,
14800 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
14801 mkU8(8)) );
14802
14803 putMMXReg(
14804 gregLO3ofRM(modrm),
14805 binop(Iop_QAdd16Sx4,
14806 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
14807 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
14808 )
14809 );
14810 goto decode_success;
14811 }
14812 break;
14813
14814 case 0x08:
14815 case 0x09:
14816 case 0x0A:
14817 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
14818 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
14819 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
14820 if (have66noF2noF3(pfx)
14821 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
14822 IRTemp sV = newTemp(Ity_V128);
14823 IRTemp dV = newTemp(Ity_V128);
14824 IRTemp sHi = newTemp(Ity_I64);
14825 IRTemp sLo = newTemp(Ity_I64);
14826 IRTemp dHi = newTemp(Ity_I64);
14827 IRTemp dLo = newTemp(Ity_I64);
14828 HChar* str = "???";
14829 Int laneszB = 0;
14830
14831 switch (opc) {
14832 case 0x08: laneszB = 1; str = "b"; break;
14833 case 0x09: laneszB = 2; str = "w"; break;
14834 case 0x0A: laneszB = 4; str = "d"; break;
14835 default: vassert(0);
14836 }
14837
14838 modrm = getUChar(delta);
14839 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14840
14841 if (epartIsReg(modrm)) {
14842 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14843 delta += 1;
14844 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
14845 nameXMMReg(gregOfRexRM(pfx,modrm)));
14846 } else {
14847 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14848 gen_SEGV_if_not_16_aligned( addr );
14849 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14850 delta += alen;
14851 DIP("psign%s %s,%s\n", str, dis_buf,
14852 nameXMMReg(gregOfRexRM(pfx,modrm)));
14853 }
14854
14855 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
14856 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
14857 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
14858 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
14859
14860 putXMMReg(
14861 gregOfRexRM(pfx,modrm),
14862 binop(Iop_64HLtoV128,
14863 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
14864 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
14865 )
14866 );
14867 goto decode_success;
14868 }
14869 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
14870 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
14871 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
14872 if (haveNo66noF2noF3(pfx) && sz == 4) {
14873 IRTemp sV = newTemp(Ity_I64);
14874 IRTemp dV = newTemp(Ity_I64);
14875 HChar* str = "???";
14876 Int laneszB = 0;
14877
14878 switch (opc) {
14879 case 0x08: laneszB = 1; str = "b"; break;
14880 case 0x09: laneszB = 2; str = "w"; break;
14881 case 0x0A: laneszB = 4; str = "d"; break;
14882 default: vassert(0);
14883 }
14884
14885 modrm = getUChar(delta);
14886 do_MMX_preamble();
14887 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14888
14889 if (epartIsReg(modrm)) {
14890 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14891 delta += 1;
14892 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
14893 nameMMXReg(gregLO3ofRM(modrm)));
14894 } else {
14895 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14896 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14897 delta += alen;
14898 DIP("psign%s %s,%s\n", str, dis_buf,
14899 nameMMXReg(gregLO3ofRM(modrm)));
14900 }
14901
14902 putMMXReg(
14903 gregLO3ofRM(modrm),
14904 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
14905 );
14906 goto decode_success;
14907 }
14908 break;
14909
14910 case 0x0B:
14911 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
14912 Scale (XMM) */
14913 if (have66noF2noF3(pfx)
14914 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
14915 IRTemp sV = newTemp(Ity_V128);
14916 IRTemp dV = newTemp(Ity_V128);
14917 IRTemp sHi = newTemp(Ity_I64);
14918 IRTemp sLo = newTemp(Ity_I64);
14919 IRTemp dHi = newTemp(Ity_I64);
14920 IRTemp dLo = newTemp(Ity_I64);
14921
14922 modrm = getUChar(delta);
14923 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14924
14925 if (epartIsReg(modrm)) {
14926 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14927 delta += 1;
14928 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14929 nameXMMReg(gregOfRexRM(pfx,modrm)));
14930 } else {
14931 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14932 gen_SEGV_if_not_16_aligned( addr );
14933 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14934 delta += alen;
14935 DIP("pmulhrsw %s,%s\n", dis_buf,
14936 nameXMMReg(gregOfRexRM(pfx,modrm)));
14937 }
14938
14939 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
14940 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
14941 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
14942 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
14943
14944 putXMMReg(
14945 gregOfRexRM(pfx,modrm),
14946 binop(Iop_64HLtoV128,
14947 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
14948 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
14949 )
14950 );
14951 goto decode_success;
14952 }
14953 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
14954 (MMX) */
14955 if (haveNo66noF2noF3(pfx) && sz == 4) {
14956 IRTemp sV = newTemp(Ity_I64);
14957 IRTemp dV = newTemp(Ity_I64);
14958
14959 modrm = getUChar(delta);
14960 do_MMX_preamble();
14961 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14962
14963 if (epartIsReg(modrm)) {
14964 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14965 delta += 1;
14966 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14967 nameMMXReg(gregLO3ofRM(modrm)));
14968 } else {
14969 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14970 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14971 delta += alen;
14972 DIP("pmulhrsw %s,%s\n", dis_buf,
14973 nameMMXReg(gregLO3ofRM(modrm)));
14974 }
14975
14976 putMMXReg(
14977 gregLO3ofRM(modrm),
14978 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
14979 );
14980 goto decode_success;
14981 }
14982 break;
14983
14984 case 0x1C:
14985 case 0x1D:
14986 case 0x1E:
14987 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
14988 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
14989 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
14990 if (have66noF2noF3(pfx)
14991 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj97f72452012-05-23 05:56:53 +000014992 IRTemp sV = newTemp(Ity_V128);
14993 HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000014994 Int laneszB = 0;
14995
14996 switch (opc) {
14997 case 0x1C: laneszB = 1; str = "b"; break;
14998 case 0x1D: laneszB = 2; str = "w"; break;
14999 case 0x1E: laneszB = 4; str = "d"; break;
15000 default: vassert(0);
15001 }
15002
15003 modrm = getUChar(delta);
sewardj80611e32012-01-20 13:07:24 +000015004 if (epartIsReg(modrm)) {
15005 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15006 delta += 1;
15007 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
15008 nameXMMReg(gregOfRexRM(pfx,modrm)));
15009 } else {
15010 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15011 gen_SEGV_if_not_16_aligned( addr );
15012 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15013 delta += alen;
15014 DIP("pabs%s %s,%s\n", str, dis_buf,
15015 nameXMMReg(gregOfRexRM(pfx,modrm)));
15016 }
15017
sewardj97f72452012-05-23 05:56:53 +000015018 putXMMReg( gregOfRexRM(pfx,modrm),
15019 mkexpr(math_PABS_XMM(sV, laneszB)) );
sewardj80611e32012-01-20 13:07:24 +000015020 goto decode_success;
15021 }
15022 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
15023 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
15024 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
15025 if (haveNo66noF2noF3(pfx) && sz == 4) {
15026 IRTemp sV = newTemp(Ity_I64);
15027 HChar* str = "???";
15028 Int laneszB = 0;
15029
15030 switch (opc) {
15031 case 0x1C: laneszB = 1; str = "b"; break;
15032 case 0x1D: laneszB = 2; str = "w"; break;
15033 case 0x1E: laneszB = 4; str = "d"; break;
15034 default: vassert(0);
15035 }
15036
15037 modrm = getUChar(delta);
15038 do_MMX_preamble();
15039
15040 if (epartIsReg(modrm)) {
15041 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15042 delta += 1;
15043 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15044 nameMMXReg(gregLO3ofRM(modrm)));
15045 } else {
15046 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15047 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15048 delta += alen;
15049 DIP("pabs%s %s,%s\n", str, dis_buf,
15050 nameMMXReg(gregLO3ofRM(modrm)));
15051 }
15052
sewardj97f72452012-05-23 05:56:53 +000015053 putMMXReg( gregLO3ofRM(modrm),
15054 mkexpr(math_PABS_MMX( sV, laneszB )) );
sewardj80611e32012-01-20 13:07:24 +000015055 goto decode_success;
15056 }
15057 break;
15058
15059 default:
15060 break;
15061
15062 }
15063
15064 //decode_failure:
15065 *decode_OK = False;
15066 return deltaIN;
15067
15068 decode_success:
15069 *decode_OK = True;
15070 return delta;
15071}
15072
15073
15074/*------------------------------------------------------------*/
15075/*--- ---*/
15076/*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
15077/*--- ---*/
15078/*------------------------------------------------------------*/
15079
15080__attribute__((noinline))
15081static
15082Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
15083 VexAbiInfo* vbi,
15084 Prefix pfx, Int sz, Long deltaIN )
15085{
15086 Long d64 = 0;
15087 IRTemp addr = IRTemp_INVALID;
15088 UChar modrm = 0;
15089 Int alen = 0;
15090 HChar dis_buf[50];
15091
15092 *decode_OK = False;
15093
15094 Long delta = deltaIN;
15095 UChar opc = getUChar(delta);
15096 delta++;
15097 switch (opc) {
15098
15099 case 0x0F:
15100 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
15101 if (have66noF2noF3(pfx)
15102 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15103 IRTemp sV = newTemp(Ity_V128);
15104 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000015105
15106 modrm = getUChar(delta);
15107 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15108
15109 if (epartIsReg(modrm)) {
15110 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15111 d64 = (Long)getUChar(delta+1);
15112 delta += 1+1;
15113 DIP("palignr $%d,%s,%s\n", (Int)d64,
15114 nameXMMReg(eregOfRexRM(pfx,modrm)),
15115 nameXMMReg(gregOfRexRM(pfx,modrm)));
15116 } else {
15117 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15118 gen_SEGV_if_not_16_aligned( addr );
15119 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15120 d64 = (Long)getUChar(delta+alen);
15121 delta += alen+1;
15122 DIP("palignr $%d,%s,%s\n", (Int)d64,
15123 dis_buf,
15124 nameXMMReg(gregOfRexRM(pfx,modrm)));
15125 }
15126
sewardj151cd3e2012-06-18 13:56:55 +000015127 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
15128 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000015129 goto decode_success;
15130 }
15131 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
15132 if (haveNo66noF2noF3(pfx) && sz == 4) {
15133 IRTemp sV = newTemp(Ity_I64);
15134 IRTemp dV = newTemp(Ity_I64);
15135 IRTemp res = newTemp(Ity_I64);
15136
15137 modrm = getUChar(delta);
15138 do_MMX_preamble();
15139 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15140
15141 if (epartIsReg(modrm)) {
15142 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15143 d64 = (Long)getUChar(delta+1);
15144 delta += 1+1;
15145 DIP("palignr $%d,%s,%s\n", (Int)d64,
15146 nameMMXReg(eregLO3ofRM(modrm)),
15147 nameMMXReg(gregLO3ofRM(modrm)));
15148 } else {
15149 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15150 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15151 d64 = (Long)getUChar(delta+alen);
15152 delta += alen+1;
15153 DIP("palignr $%d%s,%s\n", (Int)d64,
15154 dis_buf,
15155 nameMMXReg(gregLO3ofRM(modrm)));
15156 }
15157
15158 if (d64 == 0) {
15159 assign( res, mkexpr(sV) );
15160 }
15161 else if (d64 >= 1 && d64 <= 7) {
15162 assign(res,
15163 binop(Iop_Or64,
15164 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
15165 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
15166 )));
15167 }
15168 else if (d64 == 8) {
15169 assign( res, mkexpr(dV) );
15170 }
15171 else if (d64 >= 9 && d64 <= 15) {
15172 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
15173 }
15174 else if (d64 >= 16 && d64 <= 255) {
15175 assign( res, mkU64(0) );
15176 }
15177 else
15178 vassert(0);
15179
15180 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
15181 goto decode_success;
15182 }
15183 break;
15184
15185 default:
15186 break;
15187
15188 }
15189
15190 //decode_failure:
15191 *decode_OK = False;
15192 return deltaIN;
15193
15194 decode_success:
15195 *decode_OK = True;
15196 return delta;
15197}
15198
15199
15200/*------------------------------------------------------------*/
15201/*--- ---*/
15202/*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
15203/*--- ---*/
15204/*------------------------------------------------------------*/
15205
15206__attribute__((noinline))
15207static
15208Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
15209 VexArchInfo* archinfo,
15210 VexAbiInfo* vbi,
15211 Prefix pfx, Int sz, Long deltaIN )
15212{
15213 IRTemp addr = IRTemp_INVALID;
15214 IRType ty = Ity_INVALID;
15215 UChar modrm = 0;
15216 Int alen = 0;
15217 HChar dis_buf[50];
15218
15219 *decode_OK = False;
15220
15221 Long delta = deltaIN;
15222 UChar opc = getUChar(delta);
15223 delta++;
15224 switch (opc) {
15225
15226 case 0xB8:
15227 /* F3 0F B8 = POPCNT{W,L,Q}
15228 Count the number of 1 bits in a register
15229 */
15230 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
15231 && (sz == 2 || sz == 4 || sz == 8)) {
15232 /*IRType*/ ty = szToITy(sz);
15233 IRTemp src = newTemp(ty);
15234 modrm = getUChar(delta);
15235 if (epartIsReg(modrm)) {
15236 assign(src, getIRegE(sz, pfx, modrm));
15237 delta += 1;
15238 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15239 nameIRegG(sz, pfx, modrm));
15240 } else {
15241 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
15242 assign(src, loadLE(ty, mkexpr(addr)));
15243 delta += alen;
15244 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
15245 nameIRegG(sz, pfx, modrm));
15246 }
15247
15248 IRTemp result = gen_POPCOUNT(ty, src);
15249 putIRegG(sz, pfx, modrm, mkexpr(result));
15250
15251 // Update flags. This is pretty lame .. perhaps can do better
15252 // if this turns out to be performance critical.
15253 // O S A C P are cleared. Z is set if SRC == 0.
15254 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15255 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15256 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15257 stmt( IRStmt_Put( OFFB_CC_DEP1,
15258 binop(Iop_Shl64,
15259 unop(Iop_1Uto64,
15260 binop(Iop_CmpEQ64,
15261 widenUto64(mkexpr(src)),
15262 mkU64(0))),
15263 mkU8(AMD64G_CC_SHIFT_Z))));
15264
15265 goto decode_success;
15266 }
15267 break;
15268
15269 case 0xBD:
15270 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
15271 which we can only decode if we're sure this is an AMD cpu
15272 that supports LZCNT, since otherwise it's BSR, which behaves
15273 differently. Bizarrely, my Sandy Bridge also accepts these
15274 instructions but produces different results. */
15275 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
15276 && (sz == 2 || sz == 4 || sz == 8)
15277 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
15278 /*IRType*/ ty = szToITy(sz);
15279 IRTemp src = newTemp(ty);
15280 modrm = getUChar(delta);
15281 if (epartIsReg(modrm)) {
15282 assign(src, getIRegE(sz, pfx, modrm));
15283 delta += 1;
15284 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15285 nameIRegG(sz, pfx, modrm));
15286 } else {
15287 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
15288 assign(src, loadLE(ty, mkexpr(addr)));
15289 delta += alen;
15290 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
15291 nameIRegG(sz, pfx, modrm));
15292 }
15293
15294 IRTemp res = gen_LZCNT(ty, src);
15295 putIRegG(sz, pfx, modrm, mkexpr(res));
15296
15297 // Update flags. This is pretty lame .. perhaps can do better
15298 // if this turns out to be performance critical.
15299 // O S A P are cleared. Z is set if RESULT == 0.
15300 // C is set if SRC is zero.
15301 IRTemp src64 = newTemp(Ity_I64);
15302 IRTemp res64 = newTemp(Ity_I64);
15303 assign(src64, widenUto64(mkexpr(src)));
15304 assign(res64, widenUto64(mkexpr(res)));
15305
15306 IRTemp oszacp = newTemp(Ity_I64);
15307 assign(
15308 oszacp,
15309 binop(Iop_Or64,
15310 binop(Iop_Shl64,
15311 unop(Iop_1Uto64,
15312 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
15313 mkU8(AMD64G_CC_SHIFT_Z)),
15314 binop(Iop_Shl64,
15315 unop(Iop_1Uto64,
15316 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
15317 mkU8(AMD64G_CC_SHIFT_C))
15318 )
15319 );
15320
15321 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15322 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15323 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15324 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
15325
15326 goto decode_success;
15327 }
15328 break;
15329
15330 default:
15331 break;
15332
15333 }
15334
15335 //decode_failure:
15336 *decode_OK = False;
15337 return deltaIN;
15338
15339 decode_success:
15340 *decode_OK = True;
15341 return delta;
15342}
15343
15344
15345/*------------------------------------------------------------*/
15346/*--- ---*/
15347/*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
15348/*--- ---*/
15349/*------------------------------------------------------------*/
15350
sewardje8a7eb72012-06-12 14:59:17 +000015351static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
15352 IRTemp vec0/*controlling mask*/,
15353 UInt gran, IROp opSAR )
sewardjc4530ae2012-05-21 10:18:49 +000015354{
15355 /* The tricky bit is to convert vec0 into a suitable mask, by
15356 copying the most significant bit of each lane into all positions
15357 in the lane. */
15358 IRTemp sh = newTemp(Ity_I8);
15359 assign(sh, mkU8(8 * gran - 1));
15360
15361 IRTemp mask = newTemp(Ity_V128);
15362 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
15363
15364 IRTemp notmask = newTemp(Ity_V128);
15365 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
15366
15367 IRTemp res = newTemp(Ity_V128);
15368 assign(res, binop(Iop_OrV128,
15369 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
15370 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
15371 return res;
15372}
15373
sewardj4c0a7ac2012-06-21 09:08:19 +000015374static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
15375 IRTemp vec0/*controlling mask*/,
15376 UInt gran, IROp opSAR128 )
15377{
15378 /* The tricky bit is to convert vec0 into a suitable mask, by
15379 copying the most significant bit of each lane into all positions
15380 in the lane. */
15381 IRTemp sh = newTemp(Ity_I8);
15382 assign(sh, mkU8(8 * gran - 1));
15383
15384 IRTemp vec0Hi = IRTemp_INVALID;
15385 IRTemp vec0Lo = IRTemp_INVALID;
15386 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
15387
15388 IRTemp mask = newTemp(Ity_V256);
15389 assign(mask, binop(Iop_V128HLtoV256,
15390 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
15391 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
15392
15393 IRTemp notmask = newTemp(Ity_V256);
15394 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
15395
15396 IRTemp res = newTemp(Ity_V256);
15397 assign(res, binop(Iop_OrV256,
15398 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
15399 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
15400 return res;
15401}
15402
15403static Long dis_VBLENDV_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
15404 const HChar *name, UInt gran, IROp opSAR )
15405{
15406 IRTemp addr = IRTemp_INVALID;
15407 Int alen = 0;
15408 HChar dis_buf[50];
15409 UChar modrm = getUChar(delta);
15410 UInt rG = gregOfRexRM(pfx, modrm);
15411 UInt rV = getVexNvvvv(pfx);
15412 UInt rIS4 = 0xFF; /* invalid */
15413 IRTemp vecE = newTemp(Ity_V128);
15414 IRTemp vecV = newTemp(Ity_V128);
15415 IRTemp vecIS4 = newTemp(Ity_V128);
15416 if (epartIsReg(modrm)) {
15417 delta++;
15418 UInt rE = eregOfRexRM(pfx, modrm);
15419 assign(vecE, getXMMReg(rE));
15420 UChar ib = getUChar(delta);
15421 rIS4 = (ib >> 4) & 0xF;
15422 DIP("%s %s,%s,%s,%s\n",
15423 name, nameXMMReg(rIS4), nameXMMReg(rE),
15424 nameXMMReg(rV), nameXMMReg(rG));
15425 } else {
15426 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
15427 delta += alen;
15428 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
15429 UChar ib = getUChar(delta);
15430 rIS4 = (ib >> 4) & 0xF;
15431 DIP("%s %s,%s,%s,%s\n",
15432 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
15433 }
15434 delta++;
15435 assign(vecV, getXMMReg(rV));
15436 assign(vecIS4, getXMMReg(rIS4));
15437 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
15438 putYMMRegLoAndZU( rG, mkexpr(res) );
15439 return delta;
15440}
15441
15442static Long dis_VBLENDV_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
15443 const HChar *name, UInt gran, IROp opSAR128 )
15444{
15445 IRTemp addr = IRTemp_INVALID;
15446 Int alen = 0;
15447 HChar dis_buf[50];
15448 UChar modrm = getUChar(delta);
15449 UInt rG = gregOfRexRM(pfx, modrm);
15450 UInt rV = getVexNvvvv(pfx);
15451 UInt rIS4 = 0xFF; /* invalid */
15452 IRTemp vecE = newTemp(Ity_V256);
15453 IRTemp vecV = newTemp(Ity_V256);
15454 IRTemp vecIS4 = newTemp(Ity_V256);
15455 if (epartIsReg(modrm)) {
15456 delta++;
15457 UInt rE = eregOfRexRM(pfx, modrm);
15458 assign(vecE, getYMMReg(rE));
15459 UChar ib = getUChar(delta);
15460 rIS4 = (ib >> 4) & 0xF;
15461 DIP("%s %s,%s,%s,%s\n",
15462 name, nameYMMReg(rIS4), nameYMMReg(rE),
15463 nameYMMReg(rV), nameYMMReg(rG));
15464 } else {
15465 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
15466 delta += alen;
15467 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
15468 UChar ib = getUChar(delta);
15469 rIS4 = (ib >> 4) & 0xF;
15470 DIP("%s %s,%s,%s,%s\n",
15471 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
15472 }
15473 delta++;
15474 assign(vecV, getYMMReg(rV));
15475 assign(vecIS4, getYMMReg(rIS4));
15476 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
15477 putYMMReg( rG, mkexpr(res) );
15478 return delta;
15479}
15480
sewardjed1884d2012-06-21 08:53:48 +000015481static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
15482{
15483 /* Set Z=1 iff (vecE & vecG) == 0
15484 Set C=1 iff (vecE & not vecG) == 0
15485 */
15486
15487 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
15488
15489 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
15490 and bottom 64-bits together. It relies on this trick:
15491
15492 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
15493
15494 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
15495 InterleaveHI64x2([a,b],[a,b]) == [a,a]
15496
15497 and so the OR of the above 2 exprs produces
15498 [a OR b, a OR b], from which we simply take the lower half.
15499 */
15500 IRTemp and64 = newTemp(Ity_I64);
15501 IRTemp andn64 = newTemp(Ity_I64);
15502
15503 assign(and64,
15504 unop(Iop_V128to64,
15505 binop(Iop_OrV128,
15506 binop(Iop_InterleaveLO64x2,
15507 mkexpr(andV), mkexpr(andV)),
15508 binop(Iop_InterleaveHI64x2,
15509 mkexpr(andV), mkexpr(andV)))));
15510
15511 assign(andn64,
15512 unop(Iop_V128to64,
15513 binop(Iop_OrV128,
15514 binop(Iop_InterleaveLO64x2,
15515 mkexpr(andnV), mkexpr(andnV)),
15516 binop(Iop_InterleaveHI64x2,
15517 mkexpr(andnV), mkexpr(andnV)))));
15518
15519 IRTemp z64 = newTemp(Ity_I64);
15520 IRTemp c64 = newTemp(Ity_I64);
15521 if (sign == 64) {
15522 /* When only interested in the most significant bit, just shift
15523 arithmetically right and negate. */
15524 assign(z64,
15525 unop(Iop_Not64,
15526 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
15527
15528 assign(c64,
15529 unop(Iop_Not64,
15530 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
15531 } else {
15532 if (sign == 32) {
15533 /* When interested in bit 31 and bit 63, mask those bits and
15534 fallthrough into the PTEST handling. */
15535 IRTemp t0 = newTemp(Ity_I64);
15536 IRTemp t1 = newTemp(Ity_I64);
15537 IRTemp t2 = newTemp(Ity_I64);
15538 assign(t0, mkU64(0x8000000080000000ULL));
15539 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
15540 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
15541 and64 = t1;
15542 andn64 = t2;
15543 }
15544 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
15545 slice out the Z and C bits conveniently. We use the standard
15546 trick all-zeroes -> all-zeroes, anything-else -> all-ones
15547 done by "(x | -x) >>s (word-size - 1)".
15548 */
15549 assign(z64,
15550 unop(Iop_Not64,
15551 binop(Iop_Sar64,
15552 binop(Iop_Or64,
15553 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
15554 mkexpr(and64)), mkU8(63))));
15555
15556 assign(c64,
15557 unop(Iop_Not64,
15558 binop(Iop_Sar64,
15559 binop(Iop_Or64,
15560 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
15561 mkexpr(andn64)), mkU8(63))));
15562 }
15563
15564 /* And finally, slice out the Z and C flags and set the flags
15565 thunk to COPY for them. OSAP are set to zero. */
15566 IRTemp newOSZACP = newTemp(Ity_I64);
15567 assign(newOSZACP,
15568 binop(Iop_Or64,
15569 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
15570 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
15571
15572 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
15573 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15574 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15575 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15576}
15577
15578
15579/* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
15580 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
15581static Long dis_xTESTy_128 ( VexAbiInfo* vbi, Prefix pfx,
15582 Long delta, Bool isAvx, Int sign )
15583{
15584 IRTemp addr = IRTemp_INVALID;
15585 Int alen = 0;
15586 HChar dis_buf[50];
15587 UChar modrm = getUChar(delta);
15588 UInt rG = gregOfRexRM(pfx, modrm);
15589 IRTemp vecE = newTemp(Ity_V128);
15590 IRTemp vecG = newTemp(Ity_V128);
15591
15592 if ( epartIsReg(modrm) ) {
15593 UInt rE = eregOfRexRM(pfx, modrm);
15594 assign(vecE, getXMMReg(rE));
15595 delta += 1;
15596 DIP( "%s%stest%s %s,%s\n",
15597 isAvx ? "v" : "", sign == 0 ? "p" : "",
15598 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
15599 nameXMMReg(rE), nameXMMReg(rG) );
15600 } else {
15601 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15602 if (!isAvx)
15603 gen_SEGV_if_not_16_aligned( addr );
15604 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
15605 delta += alen;
15606 DIP( "%s%stest%s %s,%s\n",
15607 isAvx ? "v" : "", sign == 0 ? "p" : "",
15608 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
15609 dis_buf, nameXMMReg(rG) );
15610 }
15611
15612 assign(vecG, getXMMReg(rG));
15613
15614 /* Set Z=1 iff (vecE & vecG) == 0
15615 Set C=1 iff (vecE & not vecG) == 0
15616 */
15617
15618 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
15619 IRTemp andV = newTemp(Ity_V128);
15620 IRTemp andnV = newTemp(Ity_V128);
15621 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
15622 assign(andnV, binop(Iop_AndV128,
15623 mkexpr(vecE),
15624 binop(Iop_XorV128, mkexpr(vecG),
15625 mkV128(0xFFFF))));
15626
15627 finish_xTESTy ( andV, andnV, sign );
15628 return delta;
15629}
15630
15631
15632/* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
15633 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
15634static Long dis_xTESTy_256 ( VexAbiInfo* vbi, Prefix pfx,
15635 Long delta, Int sign )
15636{
15637 IRTemp addr = IRTemp_INVALID;
15638 Int alen = 0;
15639 HChar dis_buf[50];
15640 UChar modrm = getUChar(delta);
15641 UInt rG = gregOfRexRM(pfx, modrm);
15642 IRTemp vecE = newTemp(Ity_V256);
15643 IRTemp vecG = newTemp(Ity_V256);
15644
15645 if ( epartIsReg(modrm) ) {
15646 UInt rE = eregOfRexRM(pfx, modrm);
15647 assign(vecE, getYMMReg(rE));
15648 delta += 1;
15649 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
15650 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
15651 nameYMMReg(rE), nameYMMReg(rG) );
15652 } else {
15653 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15654 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
15655 delta += alen;
15656 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
15657 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
15658 dis_buf, nameYMMReg(rG) );
15659 }
15660
15661 assign(vecG, getYMMReg(rG));
15662
15663 /* Set Z=1 iff (vecE & vecG) == 0
15664 Set C=1 iff (vecE & not vecG) == 0
15665 */
15666
15667 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
15668 IRTemp andV = newTemp(Ity_V256);
15669 IRTemp andnV = newTemp(Ity_V256);
15670 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
15671 assign(andnV, binop(Iop_AndV256,
15672 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
15673
15674 IRTemp andVhi = IRTemp_INVALID;
15675 IRTemp andVlo = IRTemp_INVALID;
15676 IRTemp andnVhi = IRTemp_INVALID;
15677 IRTemp andnVlo = IRTemp_INVALID;
15678 breakupV256toV128s( andV, &andVhi, &andVlo );
15679 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
15680
15681 IRTemp andV128 = newTemp(Ity_V128);
15682 IRTemp andnV128 = newTemp(Ity_V128);
15683 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
15684 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
15685
15686 finish_xTESTy ( andV128, andnV128, sign );
15687 return delta;
15688}
15689
sewardjc4530ae2012-05-21 10:18:49 +000015690
sewardj6fcd43e2012-06-14 08:51:35 +000015691/* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
15692static Long dis_PMOVxXBW_128 ( VexAbiInfo* vbi, Prefix pfx,
15693 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000015694{
15695 IRTemp addr = IRTemp_INVALID;
15696 Int alen = 0;
15697 HChar dis_buf[50];
15698 IRTemp srcVec = newTemp(Ity_V128);
15699 UChar modrm = getUChar(delta);
sewardje8a7eb72012-06-12 14:59:17 +000015700 UChar* mbV = isAvx ? "v" : "";
sewardj6fcd43e2012-06-14 08:51:35 +000015701 UChar how = xIsZ ? 'z' : 's';
15702 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000015703 if ( epartIsReg(modrm) ) {
sewardj6fcd43e2012-06-14 08:51:35 +000015704 UInt rE = eregOfRexRM(pfx, modrm);
15705 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000015706 delta += 1;
sewardj6fcd43e2012-06-14 08:51:35 +000015707 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000015708 } else {
15709 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15710 assign( srcVec,
15711 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15712 delta += alen;
sewardj6fcd43e2012-06-14 08:51:35 +000015713 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000015714 }
15715
15716 IRExpr* res
sewardj6fcd43e2012-06-14 08:51:35 +000015717 = xIsZ /* do math for either zero or sign extend */
15718 ? binop( Iop_InterleaveLO8x16,
15719 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
15720 : binop( Iop_SarN16x8,
15721 binop( Iop_ShlN16x8,
15722 binop( Iop_InterleaveLO8x16,
15723 IRExpr_Const( IRConst_V128(0) ),
15724 mkexpr(srcVec) ),
15725 mkU8(8) ),
15726 mkU8(8) );
sewardjc4530ae2012-05-21 10:18:49 +000015727
sewardj6fcd43e2012-06-14 08:51:35 +000015728 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
sewardjc4530ae2012-05-21 10:18:49 +000015729
15730 return delta;
15731}
15732
15733
sewardj6fcd43e2012-06-14 08:51:35 +000015734static Long dis_PMOVxXWD_128 ( VexAbiInfo* vbi, Prefix pfx,
15735 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000015736{
15737 IRTemp addr = IRTemp_INVALID;
15738 Int alen = 0;
15739 HChar dis_buf[50];
15740 IRTemp srcVec = newTemp(Ity_V128);
15741 UChar modrm = getUChar(delta);
sewardje8a7eb72012-06-12 14:59:17 +000015742 UChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000015743 UChar how = xIsZ ? 'z' : 's';
15744 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000015745
15746 if ( epartIsReg(modrm) ) {
sewardj8516a1f2012-06-24 14:26:30 +000015747 UInt rE = eregOfRexRM(pfx, modrm);
15748 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000015749 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000015750 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000015751 } else {
15752 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15753 assign( srcVec,
15754 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
15755 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000015756 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000015757 }
15758
15759 IRExpr* res
15760 = binop( Iop_InterleaveLO16x8,
15761 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
sewardj6fcd43e2012-06-14 08:51:35 +000015762 if (!xIsZ)
15763 res = binop(Iop_SarN32x4,
15764 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
sewardjc4530ae2012-05-21 10:18:49 +000015765
sewardje8a7eb72012-06-12 14:59:17 +000015766 (isAvx ? putYMMRegLoAndZU : putXMMReg)
sewardjc4530ae2012-05-21 10:18:49 +000015767 ( gregOfRexRM(pfx, modrm), res );
15768
15769 return delta;
15770}
15771
15772
sewardj8516a1f2012-06-24 14:26:30 +000015773static Long dis_PMOVSXWQ_128 ( VexAbiInfo* vbi, Prefix pfx,
15774 Long delta, Bool isAvx )
15775{
15776 IRTemp addr = IRTemp_INVALID;
15777 Int alen = 0;
15778 HChar dis_buf[50];
15779 IRTemp srcBytes = newTemp(Ity_I32);
15780 UChar modrm = getUChar(delta);
15781 UChar* mbV = isAvx ? "v" : "";
15782 UInt rG = gregOfRexRM(pfx, modrm);
15783
15784 if ( epartIsReg( modrm ) ) {
15785 UInt rE = eregOfRexRM(pfx, modrm);
15786 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
15787 delta += 1;
15788 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
15789 } else {
15790 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15791 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
15792 delta += alen;
15793 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
15794 }
15795
15796 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15797 ( rG, binop( Iop_64HLtoV128,
15798 unop( Iop_16Sto64,
15799 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
15800 unop( Iop_16Sto64,
15801 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
15802 return delta;
15803}
15804
15805
15806static Long dis_PMOVZXWQ_128 ( VexAbiInfo* vbi, Prefix pfx,
15807 Long delta, Bool isAvx )
15808{
15809 IRTemp addr = IRTemp_INVALID;
15810 Int alen = 0;
15811 HChar dis_buf[50];
15812 IRTemp srcVec = newTemp(Ity_V128);
15813 UChar modrm = getUChar(delta);
15814 UChar* mbV = isAvx ? "v" : "";
15815 UInt rG = gregOfRexRM(pfx, modrm);
15816
15817 if ( epartIsReg( modrm ) ) {
15818 UInt rE = eregOfRexRM(pfx, modrm);
15819 assign( srcVec, getXMMReg(rE) );
15820 delta += 1;
15821 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
15822 } else {
15823 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15824 assign( srcVec,
15825 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
15826 delta += alen;
15827 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
15828 }
15829
15830 IRTemp zeroVec = newTemp( Ity_V128 );
15831 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15832
15833 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15834 ( rG, binop( Iop_InterleaveLO16x8,
15835 mkexpr(zeroVec),
15836 binop( Iop_InterleaveLO16x8,
15837 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
15838 return delta;
15839}
15840
15841
sewardj6fcd43e2012-06-14 08:51:35 +000015842/* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
15843static Long dis_PMOVxXDQ_128 ( VexAbiInfo* vbi, Prefix pfx,
15844 Long delta, Bool isAvx, Bool xIsZ )
15845{
15846 IRTemp addr = IRTemp_INVALID;
15847 Int alen = 0;
15848 HChar dis_buf[50];
15849 IRTemp srcI64 = newTemp(Ity_I64);
15850 IRTemp srcVec = newTemp(Ity_V128);
15851 UChar modrm = getUChar(delta);
15852 UChar* mbV = isAvx ? "v" : "";
15853 UChar how = xIsZ ? 'z' : 's';
15854 UInt rG = gregOfRexRM(pfx, modrm);
15855 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
15856 thing in a V128, with arbitrary junk in the top 64 bits. Use
15857 one or both of them and let iropt clean up afterwards (as
15858 usual). */
15859 if ( epartIsReg(modrm) ) {
15860 UInt rE = eregOfRexRM(pfx, modrm);
15861 assign( srcVec, getXMMReg(rE) );
15862 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
15863 delta += 1;
15864 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
15865 } else {
15866 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15867 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
15868 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
15869 delta += alen;
15870 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
15871 }
15872
15873 IRExpr* res
15874 = xIsZ /* do math for either zero or sign extend */
15875 ? binop( Iop_InterleaveLO32x4,
15876 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
15877 : binop( Iop_64HLtoV128,
15878 unop( Iop_32Sto64,
15879 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
15880 unop( Iop_32Sto64,
15881 unop( Iop_64to32, mkexpr(srcI64) ) ) );
15882
15883 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
15884
15885 return delta;
15886}
15887
15888
sewardj4ed05e02012-06-18 15:01:30 +000015889/* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
15890static Long dis_PMOVxXBD_128 ( VexAbiInfo* vbi, Prefix pfx,
15891 Long delta, Bool isAvx, Bool xIsZ )
15892{
15893 IRTemp addr = IRTemp_INVALID;
15894 Int alen = 0;
15895 HChar dis_buf[50];
15896 IRTemp srcVec = newTemp(Ity_V128);
15897 UChar modrm = getUChar(delta);
15898 UChar* mbV = isAvx ? "v" : "";
15899 UChar how = xIsZ ? 'z' : 's';
15900 UInt rG = gregOfRexRM(pfx, modrm);
15901 if ( epartIsReg(modrm) ) {
15902 UInt rE = eregOfRexRM(pfx, modrm);
15903 assign( srcVec, getXMMReg(rE) );
15904 delta += 1;
15905 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
15906 } else {
15907 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15908 assign( srcVec,
15909 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
15910 delta += alen;
15911 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
15912 }
15913
15914 IRTemp zeroVec = newTemp(Ity_V128);
15915 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15916
15917 IRExpr* res
15918 = binop(Iop_InterleaveLO8x16,
15919 mkexpr(zeroVec),
15920 binop(Iop_InterleaveLO8x16,
15921 mkexpr(zeroVec), mkexpr(srcVec)));
15922 if (!xIsZ)
15923 res = binop(Iop_SarN32x4,
15924 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
15925
15926 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
15927
15928 return delta;
15929}
15930
15931
sewardj8516a1f2012-06-24 14:26:30 +000015932/* Handles 128 bit versions of PMOVSXBQ. */
15933static Long dis_PMOVSXBQ_128 ( VexAbiInfo* vbi, Prefix pfx,
15934 Long delta, Bool isAvx )
15935{
15936 IRTemp addr = IRTemp_INVALID;
15937 Int alen = 0;
15938 HChar dis_buf[50];
15939 IRTemp srcBytes = newTemp(Ity_I16);
15940 UChar modrm = getUChar(delta);
15941 UChar* mbV = isAvx ? "v" : "";
15942 UInt rG = gregOfRexRM(pfx, modrm);
15943 if ( epartIsReg(modrm) ) {
15944 UInt rE = eregOfRexRM(pfx, modrm);
15945 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
15946 delta += 1;
15947 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
15948 } else {
15949 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15950 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
15951 delta += alen;
15952 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
15953 }
15954
15955 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15956 ( rG, binop( Iop_64HLtoV128,
15957 unop( Iop_8Sto64,
15958 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
15959 unop( Iop_8Sto64,
15960 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
15961 return delta;
15962}
15963
15964
15965/* Handles 128 bit versions of PMOVZXBQ. */
15966static Long dis_PMOVZXBQ_128 ( VexAbiInfo* vbi, Prefix pfx,
15967 Long delta, Bool isAvx )
15968{
15969 IRTemp addr = IRTemp_INVALID;
15970 Int alen = 0;
15971 HChar dis_buf[50];
15972 IRTemp srcVec = newTemp(Ity_V128);
15973 UChar modrm = getUChar(delta);
15974 UChar* mbV = isAvx ? "v" : "";
15975 UInt rG = gregOfRexRM(pfx, modrm);
15976 if ( epartIsReg(modrm) ) {
15977 UInt rE = eregOfRexRM(pfx, modrm);
15978 assign( srcVec, getXMMReg(rE) );
15979 delta += 1;
15980 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
15981 } else {
15982 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
15983 assign( srcVec,
15984 unop( Iop_32UtoV128,
15985 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
15986 delta += alen;
15987 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
15988 }
15989
15990 IRTemp zeroVec = newTemp(Ity_V128);
15991 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
15992
15993 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15994 ( rG, binop( Iop_InterleaveLO8x16,
15995 mkexpr(zeroVec),
15996 binop( Iop_InterleaveLO8x16,
15997 mkexpr(zeroVec),
15998 binop( Iop_InterleaveLO8x16,
15999 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
16000 return delta;
16001}
16002
16003
sewardje8a7eb72012-06-12 14:59:17 +000016004static Long dis_PHMINPOSUW_128 ( VexAbiInfo* vbi, Prefix pfx,
16005 Long delta, Bool isAvx )
16006{
16007 IRTemp addr = IRTemp_INVALID;
16008 Int alen = 0;
16009 HChar dis_buf[50];
16010 UChar modrm = getUChar(delta);
16011 UChar* mbV = isAvx ? "v" : "";
16012 IRTemp sV = newTemp(Ity_V128);
16013 IRTemp sHi = newTemp(Ity_I64);
16014 IRTemp sLo = newTemp(Ity_I64);
16015 IRTemp dLo = newTemp(Ity_I64);
16016 UInt rG = gregOfRexRM(pfx,modrm);
16017 if (epartIsReg(modrm)) {
16018 UInt rE = eregOfRexRM(pfx,modrm);
16019 assign( sV, getXMMReg(rE) );
16020 delta += 1;
16021 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
16022 } else {
16023 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16024 gen_SEGV_if_not_16_aligned(addr);
16025 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16026 delta += alen;
16027 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
16028 }
16029 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16030 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16031 assign( dLo, mkIRExprCCall(
16032 Ity_I64, 0/*regparms*/,
16033 "amd64g_calculate_sse_phminposuw",
16034 &amd64g_calculate_sse_phminposuw,
16035 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
16036 ));
16037 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16038 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
16039 return delta;
16040}
16041
16042
sewardj1407a362012-06-24 15:11:38 +000016043static Long dis_AESx ( VexAbiInfo* vbi, Prefix pfx,
16044 Long delta, Bool isAvx, UChar opc )
16045{
16046 IRTemp addr = IRTemp_INVALID;
16047 Int alen = 0;
16048 HChar dis_buf[50];
16049 UChar modrm = getUChar(delta);
16050 UInt rG = gregOfRexRM(pfx, modrm);
16051 UInt regNoL = 0;
16052 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
16053
16054 /* This is a nasty kludge. We need to pass 2 x V128 to the
16055 helper. Since we can't do that, use a dirty
16056 helper to compute the results directly from the XMM regs in
16057 the guest state. That means for the memory case, we need to
16058 move the left operand into a pseudo-register (XMM16, let's
16059 call it). */
16060 if (epartIsReg(modrm)) {
16061 regNoL = eregOfRexRM(pfx, modrm);
16062 delta += 1;
16063 } else {
16064 regNoL = 16; /* use XMM16 as an intermediary */
16065 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16066 /* alignment check needed ???? */
16067 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
16068 delta += alen;
16069 }
16070
16071 void* fn = &amd64g_dirtyhelper_AES;
16072 HChar* nm = "amd64g_dirtyhelper_AES";
16073
16074 /* Round up the arguments. Note that this is a kludge -- the
16075 use of mkU64 rather than mkIRExpr_HWord implies the
16076 assumption that the host's word size is 64-bit. */
16077 UInt gstOffD = ymmGuestRegOffset(rG);
16078 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
16079 UInt gstOffR = ymmGuestRegOffset(regNoR);
16080 IRExpr* opc4 = mkU64(opc);
16081 IRExpr* gstOffDe = mkU64(gstOffD);
16082 IRExpr* gstOffLe = mkU64(gstOffL);
16083 IRExpr* gstOffRe = mkU64(gstOffR);
16084 IRExpr** args
16085 = mkIRExprVec_4( opc4, gstOffDe, gstOffLe, gstOffRe );
16086
16087 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
16088 /* It's not really a dirty call, but we can't use the clean
16089 helper mechanism here for the very lame reason that we can't
16090 pass 2 x V128s by value to a helper, nor get one back. Hence
16091 this roundabout scheme. */
16092 d->needsBBP = True;
16093 d->nFxState = 2;
16094 vex_bzero(&d->fxState, sizeof(d->fxState));
16095 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
16096 the second for !isAvx or the third for isAvx.
16097 AESIMC (0xDB) reads the first register, and writes the second. */
16098 d->fxState[0].fx = Ifx_Read;
16099 d->fxState[0].offset = gstOffL;
16100 d->fxState[0].size = sizeof(U128);
16101 d->fxState[1].offset = gstOffR;
16102 d->fxState[1].size = sizeof(U128);
16103 if (opc == 0xDB)
16104 d->fxState[1].fx = Ifx_Write;
16105 else if (!isAvx || rG == regNoR)
16106 d->fxState[1].fx = Ifx_Modify;
16107 else {
16108 d->fxState[1].fx = Ifx_Read;
16109 d->nFxState++;
16110 d->fxState[2].fx = Ifx_Write;
16111 d->fxState[2].offset = gstOffD;
16112 d->fxState[2].size = sizeof(U128);
16113 }
16114
16115 stmt( IRStmt_Dirty(d) );
16116 {
16117 HChar* opsuf;
16118 switch (opc) {
16119 case 0xDC: opsuf = "enc"; break;
16120 case 0XDD: opsuf = "enclast"; break;
16121 case 0xDE: opsuf = "dec"; break;
16122 case 0xDF: opsuf = "declast"; break;
16123 case 0xDB: opsuf = "imc"; break;
16124 default: vassert(0);
16125 }
16126 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
16127 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
16128 nameXMMReg(regNoR),
16129 (isAvx && opc != 0xDB) ? "," : "",
16130 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
16131 }
16132 if (isAvx)
16133 putYMMRegLane128( rG, 1, mkV128(0) );
16134 return delta;
16135}
16136
16137static Long dis_AESKEYGENASSIST ( VexAbiInfo* vbi, Prefix pfx,
16138 Long delta, Bool isAvx )
16139{
16140 IRTemp addr = IRTemp_INVALID;
16141 Int alen = 0;
16142 HChar dis_buf[50];
16143 UChar modrm = getUChar(delta);
16144 UInt regNoL = 0;
16145 UInt regNoR = gregOfRexRM(pfx, modrm);
16146 UChar imm = 0;
16147
16148 /* This is a nasty kludge. See AESENC et al. instructions. */
16149 modrm = getUChar(delta);
16150 if (epartIsReg(modrm)) {
16151 regNoL = eregOfRexRM(pfx, modrm);
16152 imm = getUChar(delta+1);
16153 delta += 1+1;
16154 } else {
16155 regNoL = 16; /* use XMM16 as an intermediary */
16156 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16157 /* alignment check ???? . */
16158 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
16159 imm = getUChar(delta+alen);
16160 delta += alen+1;
16161 }
16162
16163 /* Who ya gonna call? Presumably not Ghostbusters. */
16164 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
16165 HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
16166
16167 /* Round up the arguments. Note that this is a kludge -- the
16168 use of mkU64 rather than mkIRExpr_HWord implies the
16169 assumption that the host's word size is 64-bit. */
16170 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
16171 UInt gstOffR = ymmGuestRegOffset(regNoR);
16172
16173 IRExpr* imme = mkU64(imm & 0xFF);
16174 IRExpr* gstOffLe = mkU64(gstOffL);
16175 IRExpr* gstOffRe = mkU64(gstOffR);
16176 IRExpr** args
16177 = mkIRExprVec_3( imme, gstOffLe, gstOffRe );
16178
16179 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
16180 /* It's not really a dirty call, but we can't use the clean
16181 helper mechanism here for the very lame reason that we can't
16182 pass 2 x V128s by value to a helper, nor get one back. Hence
16183 this roundabout scheme. */
16184 d->needsBBP = True;
16185 d->nFxState = 2;
16186 vex_bzero(&d->fxState, sizeof(d->fxState));
16187 d->fxState[0].fx = Ifx_Read;
16188 d->fxState[0].offset = gstOffL;
16189 d->fxState[0].size = sizeof(U128);
16190 d->fxState[1].fx = Ifx_Write;
16191 d->fxState[1].offset = gstOffR;
16192 d->fxState[1].size = sizeof(U128);
16193 stmt( IRStmt_Dirty(d) );
16194
16195 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
16196 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
16197 nameXMMReg(regNoR));
16198 if (isAvx)
16199 putYMMRegLane128( regNoR, 1, mkV128(0) );
16200 return delta;
16201}
16202
16203
sewardj80611e32012-01-20 13:07:24 +000016204__attribute__((noinline))
16205static
16206Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
16207 VexAbiInfo* vbi,
16208 Prefix pfx, Int sz, Long deltaIN )
16209{
16210 IRTemp addr = IRTemp_INVALID;
16211 UChar modrm = 0;
16212 Int alen = 0;
16213 HChar dis_buf[50];
16214
16215 *decode_OK = False;
16216
16217 Long delta = deltaIN;
16218 UChar opc = getUChar(delta);
16219 delta++;
16220 switch (opc) {
16221
16222 case 0x10:
16223 case 0x14:
16224 case 0x15:
16225 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
16226 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
16227 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
16228 Blend at various granularities, with XMM0 (implicit operand)
16229 providing the controlling mask.
16230 */
16231 if (have66noF2noF3(pfx) && sz == 2) {
16232 modrm = getUChar(delta);
16233
16234 HChar* nm = NULL;
16235 UInt gran = 0;
16236 IROp opSAR = Iop_INVALID;
16237 switch (opc) {
16238 case 0x10:
16239 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
16240 break;
16241 case 0x14:
16242 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
16243 break;
16244 case 0x15:
16245 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
16246 break;
16247 }
16248 vassert(nm);
16249
16250 IRTemp vecE = newTemp(Ity_V128);
16251 IRTemp vecG = newTemp(Ity_V128);
16252 IRTemp vec0 = newTemp(Ity_V128);
16253
16254 if ( epartIsReg(modrm) ) {
16255 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
16256 delta += 1;
16257 DIP( "%s %s,%s\n", nm,
16258 nameXMMReg( eregOfRexRM(pfx, modrm) ),
16259 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16260 } else {
16261 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16262 gen_SEGV_if_not_16_aligned( addr );
16263 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16264 delta += alen;
16265 DIP( "%s %s,%s\n", nm,
16266 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16267 }
16268
16269 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
16270 assign(vec0, getXMMReg(0));
16271
sewardje8a7eb72012-06-12 14:59:17 +000016272 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
sewardjc4530ae2012-05-21 10:18:49 +000016273 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000016274
16275 goto decode_success;
16276 }
16277 break;
16278
16279 case 0x17:
16280 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
16281 Logical compare (set ZF and CF from AND/ANDN of the operands) */
16282 if (have66noF2noF3(pfx)
16283 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjed1884d2012-06-21 08:53:48 +000016284 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
sewardj80611e32012-01-20 13:07:24 +000016285 goto decode_success;
16286 }
16287 break;
16288
16289 case 0x20:
16290 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
16291 Packed Move with Sign Extend from Byte to Word (XMM) */
sewardj6fcd43e2012-06-14 08:51:35 +000016292 if (have66noF2noF3(pfx) && sz == 2) {
16293 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
16294 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016295 goto decode_success;
16296 }
16297 break;
16298
16299 case 0x21:
16300 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
16301 Packed Move with Sign Extend from Byte to DWord (XMM) */
16302 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000016303 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
16304 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016305 goto decode_success;
16306 }
16307 break;
16308
16309 case 0x22:
16310 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
16311 Packed Move with Sign Extend from Byte to QWord (XMM) */
16312 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000016313 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000016314 goto decode_success;
16315 }
16316 break;
16317
16318 case 0x23:
16319 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
16320 Packed Move with Sign Extend from Word to DWord (XMM) */
16321 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000016322 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
16323 False/*!isAvx*/, False/*!xIsZ*/);
sewardj80611e32012-01-20 13:07:24 +000016324 goto decode_success;
16325 }
16326 break;
16327
16328 case 0x24:
16329 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
16330 Packed Move with Sign Extend from Word to QWord (XMM) */
16331 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000016332 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000016333 goto decode_success;
16334 }
16335 break;
16336
16337 case 0x25:
16338 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
16339 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
16340 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000016341 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
16342 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016343 goto decode_success;
16344 }
16345 break;
16346
16347 case 0x28:
sewardj89378162012-06-24 12:12:20 +000016348 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
sewardj80611e32012-01-20 13:07:24 +000016349 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
16350 64-bit half */
16351 /* This is a really poor translation -- could be improved if
sewardj89378162012-06-24 12:12:20 +000016352 performance critical. It's a copy-paste of PMULUDQ, too. */
sewardj80611e32012-01-20 13:07:24 +000016353 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000016354 IRTemp sV = newTemp(Ity_V128);
16355 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000016356 modrm = getUChar(delta);
sewardj89378162012-06-24 12:12:20 +000016357 UInt rG = gregOfRexRM(pfx,modrm);
16358 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000016359 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000016360 UInt rE = eregOfRexRM(pfx,modrm);
16361 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000016362 delta += 1;
sewardj89378162012-06-24 12:12:20 +000016363 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000016364 } else {
16365 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16366 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16367 delta += alen;
sewardj89378162012-06-24 12:12:20 +000016368 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000016369 }
16370
sewardj89378162012-06-24 12:12:20 +000016371 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
sewardj80611e32012-01-20 13:07:24 +000016372 goto decode_success;
16373 }
16374 break;
16375
16376 case 0x29:
16377 /* 66 0F 38 29 = PCMPEQQ
16378 64x2 equality comparison */
16379 if (have66noF2noF3(pfx) && sz == 2) {
16380 /* FIXME: this needs an alignment check */
16381 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
16382 "pcmpeqq", Iop_CmpEQ64x2, False );
16383 goto decode_success;
16384 }
16385 break;
16386
16387 case 0x2B:
16388 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
16389 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
16390 if (have66noF2noF3(pfx) && sz == 2) {
16391
16392 modrm = getUChar(delta);
16393
16394 IRTemp argL = newTemp(Ity_V128);
16395 IRTemp argR = newTemp(Ity_V128);
16396
16397 if ( epartIsReg(modrm) ) {
16398 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
16399 delta += 1;
16400 DIP( "packusdw %s,%s\n",
16401 nameXMMReg( eregOfRexRM(pfx, modrm) ),
16402 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16403 } else {
16404 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16405 gen_SEGV_if_not_16_aligned( addr );
16406 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
16407 delta += alen;
16408 DIP( "packusdw %s,%s\n",
16409 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16410 }
16411
16412 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
16413
16414 putXMMReg( gregOfRexRM(pfx, modrm),
16415 binop( Iop_QNarrowBin32Sto16Ux8,
16416 mkexpr(argL), mkexpr(argR)) );
16417
16418 goto decode_success;
16419 }
16420 break;
16421
16422 case 0x30:
16423 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
16424 Packed Move with Zero Extend from Byte to Word (XMM) */
16425 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000016426 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
16427 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016428 goto decode_success;
16429 }
16430 break;
16431
16432 case 0x31:
16433 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
16434 Packed Move with Zero Extend from Byte to DWord (XMM) */
16435 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000016436 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
16437 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016438 goto decode_success;
16439 }
16440 break;
16441
16442 case 0x32:
16443 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
16444 Packed Move with Zero Extend from Byte to QWord (XMM) */
16445 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000016446 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000016447 goto decode_success;
16448 }
16449 break;
16450
16451 case 0x33:
16452 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
16453 Packed Move with Zero Extend from Word to DWord (XMM) */
16454 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000016455 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
16456 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016457 goto decode_success;
16458 }
16459 break;
16460
16461 case 0x34:
16462 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
16463 Packed Move with Zero Extend from Word to QWord (XMM) */
16464 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000016465 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000016466 goto decode_success;
16467 }
16468 break;
16469
16470 case 0x35:
16471 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
16472 Packed Move with Zero Extend from DWord to QWord (XMM) */
16473 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000016474 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
16475 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000016476 goto decode_success;
16477 }
16478 break;
16479
16480 case 0x37:
16481 /* 66 0F 38 37 = PCMPGTQ
16482 64x2 comparison (signed, presumably; the Intel docs don't say :-)
16483 */
16484 if (have66noF2noF3(pfx) && sz == 2) {
16485 /* FIXME: this needs an alignment check */
16486 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
16487 "pcmpgtq", Iop_CmpGT64Sx2, False );
16488 goto decode_success;
16489 }
16490 break;
16491
16492 case 0x38:
16493 case 0x3C:
16494 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
16495 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
16496 */
16497 if (have66noF2noF3(pfx) && sz == 2) {
16498 /* FIXME: this needs an alignment check */
16499 Bool isMAX = opc == 0x3C;
16500 delta = dis_SSEint_E_to_G(
16501 vbi, pfx, delta,
16502 isMAX ? "pmaxsb" : "pminsb",
16503 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
16504 False
16505 );
16506 goto decode_success;
16507 }
16508 break;
16509
16510 case 0x39:
16511 case 0x3D:
16512 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
16513 Minimum of Packed Signed Double Word Integers (XMM)
16514 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
16515 Maximum of Packed Signed Double Word Integers (XMM)
16516 */
16517 if (have66noF2noF3(pfx) && sz == 2) {
16518 /* FIXME: this needs an alignment check */
16519 Bool isMAX = opc == 0x3D;
16520 delta = dis_SSEint_E_to_G(
16521 vbi, pfx, delta,
16522 isMAX ? "pmaxsd" : "pminsd",
16523 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
16524 False
16525 );
16526 goto decode_success;
16527 }
16528 break;
16529
16530 case 0x3A:
16531 case 0x3E:
16532 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
16533 Minimum of Packed Unsigned Word Integers (XMM)
16534 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
16535 Maximum of Packed Unsigned Word Integers (XMM)
16536 */
16537 if (have66noF2noF3(pfx) && sz == 2) {
16538 /* FIXME: this needs an alignment check */
16539 Bool isMAX = opc == 0x3E;
16540 delta = dis_SSEint_E_to_G(
16541 vbi, pfx, delta,
16542 isMAX ? "pmaxuw" : "pminuw",
16543 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
16544 False
16545 );
16546 goto decode_success;
16547 }
16548 break;
16549
16550 case 0x3B:
16551 case 0x3F:
16552 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
16553 Minimum of Packed Unsigned Doubleword Integers (XMM)
16554 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
16555 Maximum of Packed Unsigned Doubleword Integers (XMM)
16556 */
16557 if (have66noF2noF3(pfx) && sz == 2) {
16558 /* FIXME: this needs an alignment check */
16559 Bool isMAX = opc == 0x3F;
16560 delta = dis_SSEint_E_to_G(
16561 vbi, pfx, delta,
16562 isMAX ? "pmaxud" : "pminud",
16563 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
16564 False
16565 );
16566 goto decode_success;
16567 }
16568 break;
16569
16570 case 0x40:
16571 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
16572 32x4 integer multiply from xmm2/m128 to xmm1 */
16573 if (have66noF2noF3(pfx) && sz == 2) {
16574
16575 modrm = getUChar(delta);
16576
16577 IRTemp argL = newTemp(Ity_V128);
16578 IRTemp argR = newTemp(Ity_V128);
16579
16580 if ( epartIsReg(modrm) ) {
16581 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
16582 delta += 1;
16583 DIP( "pmulld %s,%s\n",
16584 nameXMMReg( eregOfRexRM(pfx, modrm) ),
16585 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16586 } else {
16587 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16588 gen_SEGV_if_not_16_aligned( addr );
16589 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
16590 delta += alen;
16591 DIP( "pmulld %s,%s\n",
16592 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
16593 }
16594
16595 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
16596
16597 putXMMReg( gregOfRexRM(pfx, modrm),
16598 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
16599
16600 goto decode_success;
16601 }
16602 break;
16603
sewardj8cb931e2012-02-16 22:02:14 +000016604 case 0x41:
16605 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
16606 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
16607 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000016608 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj8cb931e2012-02-16 22:02:14 +000016609 goto decode_success;
16610 }
16611 break;
16612
philippeff4d6be2012-02-14 21:34:56 +000016613 case 0xDC:
16614 case 0xDD:
16615 case 0xDE:
16616 case 0xDF:
16617 case 0xDB:
16618 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
16619 DD /r = AESENCLAST xmm1, xmm2/m128
16620 DE /r = AESDEC xmm1, xmm2/m128
16621 DF /r = AESDECLAST xmm1, xmm2/m128
16622
16623 DB /r = AESIMC xmm1, xmm2/m128 */
16624 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000016625 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
philippeff4d6be2012-02-14 21:34:56 +000016626 goto decode_success;
16627 }
16628 break;
16629
sewardj80611e32012-01-20 13:07:24 +000016630 case 0xF0:
16631 case 0xF1:
16632 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
16633 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
16634 The decoding on this is a bit unusual.
16635 */
16636 if (haveF2noF3(pfx)
16637 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
16638 modrm = getUChar(delta);
16639
16640 if (opc == 0xF0)
16641 sz = 1;
16642 else
16643 vassert(sz == 2 || sz == 4 || sz == 8);
16644
16645 IRType tyE = szToITy(sz);
16646 IRTemp valE = newTemp(tyE);
16647
16648 if (epartIsReg(modrm)) {
16649 assign(valE, getIRegE(sz, pfx, modrm));
16650 delta += 1;
16651 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
16652 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
16653 } else {
16654 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16655 assign(valE, loadLE(tyE, mkexpr(addr)));
16656 delta += alen;
16657 DIP("crc32b %s,%s\n", dis_buf,
16658 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
16659 }
16660
16661 /* Somewhat funny getting/putting of the crc32 value, in order
16662 to ensure that it turns into 64-bit gets and puts. However,
16663 mask off the upper 32 bits so as to not get memcheck false
16664 +ves around the helper call. */
16665 IRTemp valG0 = newTemp(Ity_I64);
16666 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
16667 mkU64(0xFFFFFFFF)));
16668
16669 HChar* nm = NULL;
16670 void* fn = NULL;
16671 switch (sz) {
16672 case 1: nm = "amd64g_calc_crc32b";
16673 fn = &amd64g_calc_crc32b; break;
16674 case 2: nm = "amd64g_calc_crc32w";
16675 fn = &amd64g_calc_crc32w; break;
16676 case 4: nm = "amd64g_calc_crc32l";
16677 fn = &amd64g_calc_crc32l; break;
16678 case 8: nm = "amd64g_calc_crc32q";
16679 fn = &amd64g_calc_crc32q; break;
16680 }
16681 vassert(nm && fn);
16682 IRTemp valG1 = newTemp(Ity_I64);
16683 assign(valG1,
16684 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
16685 mkIRExprVec_2(mkexpr(valG0),
16686 widenUto64(mkexpr(valE)))));
16687
16688 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
16689 goto decode_success;
16690 }
16691 break;
16692
16693 default:
16694 break;
16695
16696 }
16697
16698 //decode_failure:
16699 *decode_OK = False;
16700 return deltaIN;
16701
16702 decode_success:
16703 *decode_OK = True;
16704 return delta;
16705}
16706
16707
16708/*------------------------------------------------------------*/
16709/*--- ---*/
16710/*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
16711/*--- ---*/
16712/*------------------------------------------------------------*/
16713
sewardj82096922012-06-24 14:57:59 +000016714static Long dis_PEXTRW ( VexAbiInfo* vbi, Prefix pfx,
16715 Long delta, Bool isAvx )
16716{
16717 IRTemp addr = IRTemp_INVALID;
16718 IRTemp t0 = IRTemp_INVALID;
16719 IRTemp t1 = IRTemp_INVALID;
16720 IRTemp t2 = IRTemp_INVALID;
16721 IRTemp t3 = IRTemp_INVALID;
16722 UChar modrm = getUChar(delta);
16723 Int alen = 0;
16724 HChar dis_buf[50];
16725 UInt rG = gregOfRexRM(pfx,modrm);
16726 Int imm8_20;
16727 IRTemp xmm_vec = newTemp(Ity_V128);
16728 IRTemp d16 = newTemp(Ity_I16);
16729 HChar* mbV = isAvx ? "v" : "";
16730
16731 vassert(0==getRexW(pfx)); /* ensured by caller */
16732 assign( xmm_vec, getXMMReg(rG) );
16733 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
16734
16735 if ( epartIsReg( modrm ) ) {
16736 imm8_20 = (Int)(getUChar(delta+1) & 7);
16737 } else {
16738 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16739 imm8_20 = (Int)(getUChar(delta+alen) & 7);
16740 }
16741
16742 switch (imm8_20) {
16743 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
16744 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
16745 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
16746 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
16747 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
16748 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
16749 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
16750 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
16751 default: vassert(0);
16752 }
16753
16754 if ( epartIsReg( modrm ) ) {
16755 UInt rE = eregOfRexRM(pfx,modrm);
16756 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
16757 delta += 1+1;
16758 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
16759 nameXMMReg( rG ), nameIReg32( rE ) );
16760 } else {
16761 storeLE( mkexpr(addr), mkexpr(d16) );
16762 delta += alen+1;
16763 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
16764 }
16765 return delta;
16766}
16767
16768
sewardjc4530ae2012-05-21 10:18:49 +000016769static Long dis_PEXTRD ( VexAbiInfo* vbi, Prefix pfx,
16770 Long delta, Bool isAvx )
16771{
16772 IRTemp addr = IRTemp_INVALID;
16773 IRTemp t0 = IRTemp_INVALID;
16774 IRTemp t1 = IRTemp_INVALID;
16775 IRTemp t2 = IRTemp_INVALID;
16776 IRTemp t3 = IRTemp_INVALID;
16777 UChar modrm = 0;
16778 Int alen = 0;
16779 HChar dis_buf[50];
16780
16781 Int imm8_10;
16782 IRTemp xmm_vec = newTemp(Ity_V128);
16783 IRTemp src_dword = newTemp(Ity_I32);
16784 HChar* mbV = isAvx ? "v" : "";
16785
16786 vassert(0==getRexW(pfx)); /* ensured by caller */
16787 modrm = getUChar(delta);
16788 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj4b1cc832012-06-13 11:10:20 +000016789 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardjc4530ae2012-05-21 10:18:49 +000016790
16791 if ( epartIsReg( modrm ) ) {
16792 imm8_10 = (Int)(getUChar(delta+1) & 3);
16793 } else {
16794 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16795 imm8_10 = (Int)(getUChar(delta+alen) & 3);
16796 }
16797
16798 switch ( imm8_10 ) {
16799 case 0: assign( src_dword, mkexpr(t0) ); break;
16800 case 1: assign( src_dword, mkexpr(t1) ); break;
16801 case 2: assign( src_dword, mkexpr(t2) ); break;
16802 case 3: assign( src_dword, mkexpr(t3) ); break;
16803 default: vassert(0);
16804 }
16805
16806 if ( epartIsReg( modrm ) ) {
16807 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
16808 delta += 1+1;
16809 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
16810 nameXMMReg( gregOfRexRM(pfx, modrm) ),
16811 nameIReg32( eregOfRexRM(pfx, modrm) ) );
16812 } else {
16813 storeLE( mkexpr(addr), mkexpr(src_dword) );
16814 delta += alen+1;
16815 DIP( "%spextrd $%d, %s,%s\n", mbV,
16816 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
16817 }
16818 return delta;
16819}
16820
16821
sewardj56c30312012-06-12 08:45:39 +000016822static Long dis_PEXTRQ ( VexAbiInfo* vbi, Prefix pfx,
16823 Long delta, Bool isAvx )
16824{
16825 IRTemp addr = IRTemp_INVALID;
16826 UChar modrm = 0;
16827 Int alen = 0;
16828 HChar dis_buf[50];
16829
16830 Int imm8_0;
16831 IRTemp xmm_vec = newTemp(Ity_V128);
16832 IRTemp src_qword = newTemp(Ity_I64);
16833 HChar* mbV = isAvx ? "v" : "";
16834
16835 vassert(1==getRexW(pfx)); /* ensured by caller */
16836 modrm = getUChar(delta);
16837 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
16838
16839 if ( epartIsReg( modrm ) ) {
16840 imm8_0 = (Int)(getUChar(delta+1) & 1);
16841 } else {
16842 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16843 imm8_0 = (Int)(getUChar(delta+alen) & 1);
16844 }
16845
16846 switch ( imm8_0 ) {
16847 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
16848 break;
16849 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
16850 break;
16851 default: vassert(0);
16852 }
16853
16854 if ( epartIsReg( modrm ) ) {
16855 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
16856 delta += 1+1;
16857 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
16858 nameXMMReg( gregOfRexRM(pfx, modrm) ),
16859 nameIReg64( eregOfRexRM(pfx, modrm) ) );
16860 } else {
16861 storeLE( mkexpr(addr), mkexpr(src_qword) );
16862 delta += alen+1;
16863 DIP( "%spextrq $%d, %s,%s\n", mbV,
16864 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
16865 }
16866 return delta;
16867}
16868
16869
sewardjac75d7b2012-05-23 12:42:39 +000016870/* This can fail, in which case it returns the original (unchanged)
16871 delta. */
16872static Long dis_PCMPxSTRx ( VexAbiInfo* vbi, Prefix pfx,
16873 Long delta, Bool isAvx, UChar opc )
16874{
16875 Long delta0 = delta;
16876 UInt isISTRx = opc & 2;
16877 UInt isxSTRM = (opc & 1) ^ 1;
16878 UInt regNoL = 0;
16879 UInt regNoR = 0;
16880 UChar imm = 0;
16881 IRTemp addr = IRTemp_INVALID;
16882 Int alen = 0;
16883 HChar dis_buf[50];
16884
16885 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
16886 (which is clean). Since we can't do that, use a dirty helper to
16887 compute the results directly from the XMM regs in the guest
16888 state. That means for the memory case, we need to move the left
16889 operand into a pseudo-register (XMM16, let's call it). */
16890 UChar modrm = getUChar(delta);
16891 if (epartIsReg(modrm)) {
16892 regNoL = eregOfRexRM(pfx, modrm);
16893 regNoR = gregOfRexRM(pfx, modrm);
16894 imm = getUChar(delta+1);
16895 delta += 1+1;
16896 } else {
16897 regNoL = 16; /* use XMM16 as an intermediary */
16898 regNoR = gregOfRexRM(pfx, modrm);
sewardjd343e622012-05-24 06:17:14 +000016899 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardjac75d7b2012-05-23 12:42:39 +000016900 /* No alignment check; I guess that makes sense, given that
16901 these insns are for dealing with C style strings. */
16902 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
16903 imm = getUChar(delta+alen);
16904 delta += alen+1;
16905 }
16906
16907 /* Now we know the XMM reg numbers for the operands, and the
16908 immediate byte. Is it one we can actually handle? Throw out any
16909 cases for which the helper function has not been verified. */
16910 switch (imm) {
16911 case 0x00:
16912 case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
16913 case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A:
16914 break;
16915 case 0x01: // the 16-bit character versions of the above
16916 case 0x03: case 0x09: case 0x0B: case 0x0D: case 0x13:
16917 case 0x1B: case 0x39: case 0x3B: case 0x45: case 0x4B:
16918 break;
16919 default:
16920 return delta0; /*FAIL*/
16921 }
16922
16923 /* Who ya gonna call? Presumably not Ghostbusters. */
16924 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
16925 HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
16926
16927 /* Round up the arguments. Note that this is a kludge -- the use
16928 of mkU64 rather than mkIRExpr_HWord implies the assumption that
16929 the host's word size is 64-bit. */
16930 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
16931 UInt gstOffR = ymmGuestRegOffset(regNoR);
16932
16933 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
16934 IRExpr* gstOffLe = mkU64(gstOffL);
16935 IRExpr* gstOffRe = mkU64(gstOffR);
16936 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
16937 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
16938 IRExpr** args
16939 = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
16940
16941 IRTemp resT = newTemp(Ity_I64);
16942 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
16943 /* It's not really a dirty call, but we can't use the clean helper
16944 mechanism here for the very lame reason that we can't pass 2 x
16945 V128s by value to a helper, nor get one back. Hence this
16946 roundabout scheme. */
16947 d->needsBBP = True;
16948 d->nFxState = 2;
sewardjc9069f22012-06-01 16:09:50 +000016949 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardjac75d7b2012-05-23 12:42:39 +000016950 d->fxState[0].fx = Ifx_Read;
16951 d->fxState[0].offset = gstOffL;
16952 d->fxState[0].size = sizeof(U128);
16953 d->fxState[1].fx = Ifx_Read;
16954 d->fxState[1].offset = gstOffR;
16955 d->fxState[1].size = sizeof(U128);
16956 if (isxSTRM) {
16957 /* Declare that the helper writes XMM0. */
16958 d->nFxState = 3;
16959 d->fxState[2].fx = Ifx_Write;
16960 d->fxState[2].offset = ymmGuestRegOffset(0);
16961 d->fxState[2].size = sizeof(U128);
16962 }
16963
16964 stmt( IRStmt_Dirty(d) );
16965
16966 /* Now resT[15:0] holds the new OSZACP values, so the condition
16967 codes must be updated. And for a xSTRI case, resT[31:16] holds
16968 the new ECX value, so stash that too. */
16969 if (!isxSTRM) {
16970 putIReg64(R_RCX, binop(Iop_And64,
16971 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
16972 mkU64(0xFFFF)));
16973 }
16974
16975 /* Zap the upper half of the dest reg as per AVX conventions. */
16976 if (isxSTRM && isAvx)
16977 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
16978
16979 stmt( IRStmt_Put(
16980 OFFB_CC_DEP1,
16981 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
16982 ));
16983 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16984 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16985 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16986
16987 if (regNoL == 16) {
16988 DIP("%spcmp%cstr%c $%x,%s,%s\n",
16989 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
16990 (UInt)imm, dis_buf, nameXMMReg(regNoR));
16991 } else {
16992 DIP("%spcmp%cstr%c $%x,%s,%s\n",
16993 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
16994 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
16995 }
16996
16997 return delta;
16998}
16999
17000
sewardj21459cb2012-06-18 14:05:52 +000017001static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
17002{
sewardj4ed05e02012-06-18 15:01:30 +000017003 vassert(imm8 >= 0 && imm8 <= 15);
17004
sewardj21459cb2012-06-18 14:05:52 +000017005 // Create a V128 value which has the selected byte in the
17006 // specified lane, and zeroes everywhere else.
sewardj4ed05e02012-06-18 15:01:30 +000017007 IRTemp tmp128 = newTemp(Ity_V128);
sewardj21459cb2012-06-18 14:05:52 +000017008 IRTemp halfshift = newTemp(Ity_I64);
17009 assign(halfshift, binop(Iop_Shl64,
sewardj4ed05e02012-06-18 15:01:30 +000017010 unop(Iop_8Uto64, mkexpr(u8)),
17011 mkU8(8 * (imm8 & 7))));
sewardj21459cb2012-06-18 14:05:52 +000017012 if (imm8 < 8) {
17013 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
17014 } else {
17015 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
17016 }
17017
17018 UShort mask = ~(1 << imm8);
sewardj4ed05e02012-06-18 15:01:30 +000017019 IRTemp res = newTemp(Ity_V128);
17020 assign( res, binop(Iop_OrV128,
17021 mkexpr(tmp128),
17022 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
sewardj21459cb2012-06-18 14:05:52 +000017023 return res;
17024}
17025
17026
sewardj6faf7cc2012-05-25 15:53:01 +000017027static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
17028{
17029 IRTemp z32 = newTemp(Ity_I32);
17030 assign(z32, mkU32(0));
17031
17032 /* Surround u32 with zeroes as per imm, giving us something we can
17033 OR into a suitably masked-out v128.*/
17034 IRTemp withZs = newTemp(Ity_V128);
17035 UShort mask = 0;
17036 switch (imm8) {
17037 case 3: mask = 0x0FFF;
sewardj4b1cc832012-06-13 11:10:20 +000017038 assign(withZs, mkV128from32s(u32, z32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000017039 break;
17040 case 2: mask = 0xF0FF;
sewardj4b1cc832012-06-13 11:10:20 +000017041 assign(withZs, mkV128from32s(z32, u32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000017042 break;
17043 case 1: mask = 0xFF0F;
sewardj4b1cc832012-06-13 11:10:20 +000017044 assign(withZs, mkV128from32s(z32, z32, u32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000017045 break;
17046 case 0: mask = 0xFFF0;
sewardj4b1cc832012-06-13 11:10:20 +000017047 assign(withZs, mkV128from32s(z32, z32, z32, u32));
sewardj6faf7cc2012-05-25 15:53:01 +000017048 break;
17049 default: vassert(0);
17050 }
17051
17052 IRTemp res = newTemp(Ity_V128);
17053 assign(res, binop( Iop_OrV128,
17054 mkexpr(withZs),
17055 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
17056 return res;
17057}
17058
17059
sewardj98d02cc2012-06-02 11:55:25 +000017060static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
17061{
17062 /* Surround u64 with zeroes as per imm, giving us something we can
17063 OR into a suitably masked-out v128.*/
17064 IRTemp withZs = newTemp(Ity_V128);
17065 UShort mask = 0;
17066 if (imm8 == 0) {
17067 mask = 0xFF00;
17068 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
17069 } else {
17070 vassert(imm8 == 1);
17071 mask = 0x00FF;
17072 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
17073 }
17074
17075 IRTemp res = newTemp(Ity_V128);
17076 assign( res, binop( Iop_OrV128,
17077 mkexpr(withZs),
17078 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
17079 return res;
17080}
17081
17082
sewardjcfca8cd2012-05-27 08:25:42 +000017083static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
17084{
17085 const IRTemp inval = IRTemp_INVALID;
17086 IRTemp dstDs[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000017087 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000017088
17089 vassert(imm8 <= 255);
17090 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
17091
17092 UInt imm8_zmask = (imm8 & 15);
17093 IRTemp zero_32 = newTemp(Ity_I32);
17094 assign( zero_32, mkU32(0) );
17095 IRTemp resV = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000017096 assign( resV, mkV128from32s(
sewardjcfca8cd2012-05-27 08:25:42 +000017097 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
17098 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
17099 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
17100 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
17101 return resV;
17102}
17103
17104
sewardje8a7eb72012-06-12 14:59:17 +000017105static Long dis_PEXTRB_128_GtoE ( VexAbiInfo* vbi, Prefix pfx,
17106 Long delta, Bool isAvx )
17107{
17108 IRTemp addr = IRTemp_INVALID;
17109 Int alen = 0;
17110 HChar dis_buf[50];
17111 IRTemp xmm_vec = newTemp(Ity_V128);
17112 IRTemp sel_lane = newTemp(Ity_I32);
17113 IRTemp shr_lane = newTemp(Ity_I32);
17114 UChar* mbV = isAvx ? "v" : "";
17115 UChar modrm = getUChar(delta);
17116 IRTemp t3, t2, t1, t0;
17117 Int imm8;
17118 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
17119 t3 = t2 = t1 = t0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000017120 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardje8a7eb72012-06-12 14:59:17 +000017121
17122 if ( epartIsReg( modrm ) ) {
17123 imm8 = (Int)getUChar(delta+1);
17124 } else {
17125 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17126 imm8 = (Int)getUChar(delta+alen);
17127 }
17128 switch ( (imm8 >> 2) & 3 ) {
17129 case 0: assign( sel_lane, mkexpr(t0) ); break;
17130 case 1: assign( sel_lane, mkexpr(t1) ); break;
17131 case 2: assign( sel_lane, mkexpr(t2) ); break;
17132 case 3: assign( sel_lane, mkexpr(t3) ); break;
17133 default: vassert(0);
17134 }
17135 assign( shr_lane,
17136 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
17137
17138 if ( epartIsReg( modrm ) ) {
17139 putIReg64( eregOfRexRM(pfx,modrm),
17140 unop( Iop_32Uto64,
17141 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
17142 delta += 1+1;
17143 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
17144 nameXMMReg( gregOfRexRM(pfx, modrm) ),
17145 nameIReg64( eregOfRexRM(pfx, modrm) ) );
17146 } else {
17147 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
17148 delta += alen+1;
17149 DIP( "%spextrb $%d,%s,%s\n", mbV,
17150 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
17151 }
17152
17153 return delta;
17154}
17155
17156
sewardj4ed05e02012-06-18 15:01:30 +000017157static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
17158{
17159 vassert(imm8 < 256);
17160 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
17161 IRTemp and_vec = newTemp(Ity_V128);
17162 IRTemp sum_vec = newTemp(Ity_V128);
17163 assign( and_vec, binop( Iop_AndV128,
17164 binop( Iop_Mul64Fx2,
17165 mkexpr(dst_vec), mkexpr(src_vec) ),
17166 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
17167
17168 assign( sum_vec, binop( Iop_Add64F0x2,
17169 binop( Iop_InterleaveHI64x2,
17170 mkexpr(and_vec), mkexpr(and_vec) ),
17171 binop( Iop_InterleaveLO64x2,
17172 mkexpr(and_vec), mkexpr(and_vec) ) ) );
17173 IRTemp res = newTemp(Ity_V128);
17174 assign(res, binop( Iop_AndV128,
17175 binop( Iop_InterleaveLO64x2,
17176 mkexpr(sum_vec), mkexpr(sum_vec) ),
17177 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
17178 return res;
17179}
17180
17181
sewardjadf357c2012-06-24 13:44:17 +000017182static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
17183{
17184 vassert(imm8 < 256);
17185 IRTemp tmp_prod_vec = newTemp(Ity_V128);
17186 IRTemp prod_vec = newTemp(Ity_V128);
17187 IRTemp sum_vec = newTemp(Ity_V128);
17188 IRTemp v3, v2, v1, v0;
17189 v3 = v2 = v1 = v0 = IRTemp_INVALID;
17190 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
17191 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
17192 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
17193 0xFFFF };
17194
17195 assign( tmp_prod_vec,
17196 binop( Iop_AndV128,
17197 binop( Iop_Mul32Fx4, mkexpr(dst_vec),
17198 mkexpr(src_vec) ),
17199 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
17200 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
17201 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
17202
17203 assign( sum_vec, binop( Iop_Add32Fx4,
17204 binop( Iop_InterleaveHI32x4,
17205 mkexpr(prod_vec), mkexpr(prod_vec) ),
17206 binop( Iop_InterleaveLO32x4,
17207 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
17208
17209 IRTemp res = newTemp(Ity_V128);
17210 assign( res, binop( Iop_AndV128,
17211 binop( Iop_Add32Fx4,
17212 binop( Iop_InterleaveHI32x4,
17213 mkexpr(sum_vec), mkexpr(sum_vec) ),
17214 binop( Iop_InterleaveLO32x4,
17215 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
17216 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
17217 return res;
17218}
17219
17220
sewardj8516a1f2012-06-24 14:26:30 +000017221static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
17222{
17223 /* Mask out bits of the operands we don't need. This isn't
17224 strictly necessary, but it does ensure Memcheck doesn't
17225 give us any false uninitialised value errors as a
17226 result. */
17227 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
17228 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
17229
17230 IRTemp src_maskV = newTemp(Ity_V128);
17231 IRTemp dst_maskV = newTemp(Ity_V128);
17232 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
17233 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
17234
17235 IRTemp src_masked = newTemp(Ity_V128);
17236 IRTemp dst_masked = newTemp(Ity_V128);
17237 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
17238 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
17239
17240 /* Generate 4 64 bit values that we can hand to a clean helper */
17241 IRTemp sHi = newTemp(Ity_I64);
17242 IRTemp sLo = newTemp(Ity_I64);
17243 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
17244 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
17245
17246 IRTemp dHi = newTemp(Ity_I64);
17247 IRTemp dLo = newTemp(Ity_I64);
17248 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
17249 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
17250
17251 /* Compute halves of the result separately */
17252 IRTemp resHi = newTemp(Ity_I64);
17253 IRTemp resLo = newTemp(Ity_I64);
17254
17255 IRExpr** argsHi
17256 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
17257 mkU64( 0x80 | (imm8 & 7) ));
17258 IRExpr** argsLo
17259 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
17260 mkU64( 0x00 | (imm8 & 7) ));
17261
17262 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
17263 "amd64g_calc_mpsadbw",
17264 &amd64g_calc_mpsadbw, argsHi ));
17265 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
17266 "amd64g_calc_mpsadbw",
17267 &amd64g_calc_mpsadbw, argsLo ));
17268
17269 IRTemp res = newTemp(Ity_V128);
17270 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
17271 return res;
17272}
17273
sewardjadf357c2012-06-24 13:44:17 +000017274static Long dis_EXTRACTPS ( VexAbiInfo* vbi, Prefix pfx,
17275 Long delta, Bool isAvx )
17276{
17277 IRTemp addr = IRTemp_INVALID;
17278 Int alen = 0;
17279 HChar dis_buf[50];
17280 UChar modrm = getUChar(delta);
17281 Int imm8_10;
17282 IRTemp xmm_vec = newTemp(Ity_V128);
17283 IRTemp src_dword = newTemp(Ity_I32);
17284 UInt rG = gregOfRexRM(pfx,modrm);
17285 IRTemp t3, t2, t1, t0;
17286 t3 = t2 = t1 = t0 = IRTemp_INVALID;
17287
17288 assign( xmm_vec, getXMMReg( rG ) );
17289 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
17290
17291 if ( epartIsReg( modrm ) ) {
17292 imm8_10 = (Int)(getUChar(delta+1) & 3);
17293 } else {
17294 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17295 imm8_10 = (Int)(getUChar(delta+alen) & 3);
17296 }
17297
17298 switch ( imm8_10 ) {
17299 case 0: assign( src_dword, mkexpr(t0) ); break;
17300 case 1: assign( src_dword, mkexpr(t1) ); break;
17301 case 2: assign( src_dword, mkexpr(t2) ); break;
17302 case 3: assign( src_dword, mkexpr(t3) ); break;
17303 default: vassert(0);
17304 }
17305
17306 if ( epartIsReg( modrm ) ) {
17307 UInt rE = eregOfRexRM(pfx,modrm);
17308 putIReg32( rE, mkexpr(src_dword) );
17309 delta += 1+1;
17310 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
17311 nameXMMReg( rG ), nameIReg32( rE ) );
17312 } else {
17313 storeLE( mkexpr(addr), mkexpr(src_dword) );
17314 delta += alen+1;
17315 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
17316 nameXMMReg( rG ), dis_buf );
17317 }
17318
17319 return delta;
17320}
17321
17322
sewardj1407a362012-06-24 15:11:38 +000017323static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
17324{
17325 IRTemp t0 = newTemp(Ity_I64);
17326 IRTemp t1 = newTemp(Ity_I64);
17327 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
17328 mkexpr(dV)));
17329 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
17330 mkexpr(sV)));
17331
17332 IRTemp t2 = newTemp(Ity_I64);
17333 IRTemp t3 = newTemp(Ity_I64);
17334
17335 IRExpr** args;
17336
17337 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
17338 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
17339 &amd64g_calculate_pclmul, args));
17340 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
17341 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
17342 &amd64g_calculate_pclmul, args));
17343
17344 IRTemp res = newTemp(Ity_V128);
17345 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
17346 return res;
17347}
17348
17349
sewardj80611e32012-01-20 13:07:24 +000017350__attribute__((noinline))
17351static
17352Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
17353 VexAbiInfo* vbi,
17354 Prefix pfx, Int sz, Long deltaIN )
17355{
17356 IRTemp addr = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000017357 UChar modrm = 0;
17358 Int alen = 0;
17359 HChar dis_buf[50];
17360
17361 *decode_OK = False;
17362
17363 Long delta = deltaIN;
17364 UChar opc = getUChar(delta);
17365 delta++;
17366 switch (opc) {
17367
17368 case 0x08:
17369 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
17370 if (have66noF2noF3(pfx) && sz == 2) {
17371
17372 IRTemp src0 = newTemp(Ity_F32);
17373 IRTemp src1 = newTemp(Ity_F32);
17374 IRTemp src2 = newTemp(Ity_F32);
17375 IRTemp src3 = newTemp(Ity_F32);
17376 IRTemp res0 = newTemp(Ity_F32);
17377 IRTemp res1 = newTemp(Ity_F32);
17378 IRTemp res2 = newTemp(Ity_F32);
17379 IRTemp res3 = newTemp(Ity_F32);
17380 IRTemp rm = newTemp(Ity_I32);
17381 Int imm = 0;
17382
17383 modrm = getUChar(delta);
17384
17385 if (epartIsReg(modrm)) {
17386 assign( src0,
17387 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
17388 assign( src1,
17389 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
17390 assign( src2,
17391 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
17392 assign( src3,
17393 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
17394 imm = getUChar(delta+1);
17395 if (imm & ~15) goto decode_failure;
17396 delta += 1+1;
17397 DIP( "roundps $%d,%s,%s\n",
17398 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
17399 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17400 } else {
sewardjd343e622012-05-24 06:17:14 +000017401 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000017402 gen_SEGV_if_not_16_aligned(addr);
17403 assign( src0, loadLE(Ity_F32,
17404 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
17405 assign( src1, loadLE(Ity_F32,
17406 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
17407 assign( src2, loadLE(Ity_F32,
17408 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
17409 assign( src3, loadLE(Ity_F32,
17410 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
17411 imm = getUChar(delta+alen);
17412 if (imm & ~15) goto decode_failure;
17413 delta += alen+1;
17414 DIP( "roundps $%d,%s,%s\n",
17415 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17416 }
17417
17418 /* (imm & 3) contains an Intel-encoded rounding mode. Because
17419 that encoding is the same as the encoding for IRRoundingMode,
17420 we can use that value directly in the IR as a rounding
17421 mode. */
17422 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
17423
17424 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
17425 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
17426 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
17427 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
17428
17429 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
17430 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
17431 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
17432 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
17433
17434 goto decode_success;
17435 }
17436 break;
17437
17438 case 0x09:
17439 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
17440 if (have66noF2noF3(pfx) && sz == 2) {
17441
17442 IRTemp src0 = newTemp(Ity_F64);
17443 IRTemp src1 = newTemp(Ity_F64);
17444 IRTemp res0 = newTemp(Ity_F64);
17445 IRTemp res1 = newTemp(Ity_F64);
17446 IRTemp rm = newTemp(Ity_I32);
17447 Int imm = 0;
17448
17449 modrm = getUChar(delta);
17450
17451 if (epartIsReg(modrm)) {
17452 assign( src0,
17453 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
17454 assign( src1,
17455 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
17456 imm = getUChar(delta+1);
17457 if (imm & ~15) goto decode_failure;
17458 delta += 1+1;
17459 DIP( "roundpd $%d,%s,%s\n",
17460 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
17461 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17462 } else {
sewardjd343e622012-05-24 06:17:14 +000017463 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000017464 gen_SEGV_if_not_16_aligned(addr);
17465 assign( src0, loadLE(Ity_F64,
17466 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
17467 assign( src1, loadLE(Ity_F64,
17468 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
17469 imm = getUChar(delta+alen);
17470 if (imm & ~15) goto decode_failure;
17471 delta += alen+1;
17472 DIP( "roundpd $%d,%s,%s\n",
17473 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17474 }
17475
17476 /* (imm & 3) contains an Intel-encoded rounding mode. Because
17477 that encoding is the same as the encoding for IRRoundingMode,
17478 we can use that value directly in the IR as a rounding
17479 mode. */
17480 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
17481
17482 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
17483 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
17484
17485 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
17486 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
17487
17488 goto decode_success;
17489 }
17490 break;
17491
17492 case 0x0A:
17493 case 0x0B:
17494 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
17495 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
17496 */
17497 if (have66noF2noF3(pfx) && sz == 2) {
17498
17499 Bool isD = opc == 0x0B;
17500 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
17501 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
17502 Int imm = 0;
17503
17504 modrm = getUChar(delta);
17505
17506 if (epartIsReg(modrm)) {
17507 assign( src,
17508 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
17509 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
17510 imm = getUChar(delta+1);
17511 if (imm & ~15) goto decode_failure;
17512 delta += 1+1;
17513 DIP( "rounds%c $%d,%s,%s\n",
17514 isD ? 'd' : 's',
17515 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
17516 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17517 } else {
sewardjd343e622012-05-24 06:17:14 +000017518 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000017519 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
17520 imm = getUChar(delta+alen);
17521 if (imm & ~15) goto decode_failure;
17522 delta += alen+1;
17523 DIP( "rounds%c $%d,%s,%s\n",
17524 isD ? 'd' : 's',
17525 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17526 }
17527
17528 /* (imm & 3) contains an Intel-encoded rounding mode. Because
17529 that encoding is the same as the encoding for IRRoundingMode,
17530 we can use that value directly in the IR as a rounding
17531 mode. */
17532 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
17533 (imm & 4) ? get_sse_roundingmode()
17534 : mkU32(imm & 3),
17535 mkexpr(src)) );
17536
17537 if (isD)
17538 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
17539 else
17540 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
17541
17542 goto decode_success;
17543 }
17544 break;
17545
17546 case 0x0C:
17547 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
17548 Blend Packed Single Precision Floating-Point Values (XMM) */
17549 if (have66noF2noF3(pfx) && sz == 2) {
17550
17551 Int imm8;
17552 IRTemp dst_vec = newTemp(Ity_V128);
17553 IRTemp src_vec = newTemp(Ity_V128);
17554
17555 modrm = getUChar(delta);
17556
17557 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
17558
17559 if ( epartIsReg( modrm ) ) {
17560 imm8 = (Int)getUChar(delta+1);
17561 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17562 delta += 1+1;
17563 DIP( "blendps $%d, %s,%s\n", imm8,
17564 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17565 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17566 } else {
17567 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17568 1/* imm8 is 1 byte after the amode */ );
17569 gen_SEGV_if_not_16_aligned( addr );
17570 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
17571 imm8 = (Int)getUChar(delta+alen);
17572 delta += alen+1;
17573 DIP( "blendpd $%d, %s,%s\n",
17574 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17575 }
17576
sewardj80611e32012-01-20 13:07:24 +000017577 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000017578 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000017579 goto decode_success;
17580 }
17581 break;
17582
17583 case 0x0D:
17584 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
17585 Blend Packed Double Precision Floating-Point Values (XMM) */
17586 if (have66noF2noF3(pfx) && sz == 2) {
17587
17588 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000017589 IRTemp dst_vec = newTemp(Ity_V128);
17590 IRTemp src_vec = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000017591
17592 modrm = getUChar(delta);
17593 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
17594
17595 if ( epartIsReg( modrm ) ) {
17596 imm8 = (Int)getUChar(delta+1);
17597 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17598 delta += 1+1;
17599 DIP( "blendpd $%d, %s,%s\n", imm8,
17600 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17601 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17602 } else {
17603 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17604 1/* imm8 is 1 byte after the amode */ );
17605 gen_SEGV_if_not_16_aligned( addr );
17606 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
17607 imm8 = (Int)getUChar(delta+alen);
17608 delta += alen+1;
17609 DIP( "blendpd $%d, %s,%s\n",
17610 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17611 }
17612
sewardj80611e32012-01-20 13:07:24 +000017613 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000017614 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000017615 goto decode_success;
17616 }
17617 break;
17618
17619 case 0x0E:
17620 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
17621 Blend Packed Words (XMM) */
17622 if (have66noF2noF3(pfx) && sz == 2) {
17623
17624 Int imm8;
17625 IRTemp dst_vec = newTemp(Ity_V128);
17626 IRTemp src_vec = newTemp(Ity_V128);
17627
17628 modrm = getUChar(delta);
17629
17630 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
17631
17632 if ( epartIsReg( modrm ) ) {
17633 imm8 = (Int)getUChar(delta+1);
17634 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17635 delta += 1+1;
17636 DIP( "pblendw $%d, %s,%s\n", imm8,
17637 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17638 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17639 } else {
17640 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17641 1/* imm8 is 1 byte after the amode */ );
17642 gen_SEGV_if_not_16_aligned( addr );
17643 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
17644 imm8 = (Int)getUChar(delta+alen);
17645 delta += alen+1;
17646 DIP( "pblendw $%d, %s,%s\n",
17647 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17648 }
17649
sewardj80611e32012-01-20 13:07:24 +000017650 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000017651 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000017652 goto decode_success;
17653 }
17654 break;
17655
17656 case 0x14:
17657 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
17658 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
17659 (XMM) */
17660 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000017661 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017662 goto decode_success;
17663 }
17664 break;
17665
17666 case 0x15:
17667 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
17668 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
17669 (XMM) */
17670 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000017671 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017672 goto decode_success;
17673 }
17674 break;
17675
17676 case 0x16:
17677 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
17678 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
17679 Note that this insn has the same opcodes as PEXTRQ, but
17680 here the REX.W bit is _not_ present */
17681 if (have66noF2noF3(pfx)
17682 && sz == 2 /* REX.W is _not_ present */) {
sewardjc4530ae2012-05-21 10:18:49 +000017683 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017684 goto decode_success;
17685 }
17686 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
17687 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
17688 Note that this insn has the same opcodes as PEXTRD, but
17689 here the REX.W bit is present */
17690 if (have66noF2noF3(pfx)
17691 && sz == 8 /* REX.W is present */) {
sewardj56c30312012-06-12 08:45:39 +000017692 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000017693 goto decode_success;
17694 }
17695 break;
17696
17697 case 0x17:
17698 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
17699 float from xmm reg and store in gen.reg or mem. This is
17700 identical to PEXTRD, except that REX.W appears to be ignored.
17701 */
17702 if (have66noF2noF3(pfx)
17703 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjadf357c2012-06-24 13:44:17 +000017704 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017705 goto decode_success;
17706 }
17707 break;
17708
17709 case 0x20:
17710 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
17711 Extract byte from r32/m8 and insert into xmm1 */
17712 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000017713 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000017714 IRTemp new8 = newTemp(Ity_I8);
sewardj80611e32012-01-20 13:07:24 +000017715 modrm = getUChar(delta);
sewardj21459cb2012-06-18 14:05:52 +000017716 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017717 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000017718 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000017719 imm8 = (Int)(getUChar(delta+1) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000017720 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
sewardj80611e32012-01-20 13:07:24 +000017721 delta += 1+1;
17722 DIP( "pinsrb $%d,%s,%s\n", imm8,
sewardj4ed05e02012-06-18 15:01:30 +000017723 nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017724 } else {
17725 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17726 imm8 = (Int)(getUChar(delta+alen) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000017727 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000017728 delta += alen+1;
17729 DIP( "pinsrb $%d,%s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000017730 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017731 }
sewardj21459cb2012-06-18 14:05:52 +000017732 IRTemp src_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000017733 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
17734 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
17735 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000017736 goto decode_success;
17737 }
17738 break;
17739
17740 case 0x21:
sewardjcfca8cd2012-05-27 08:25:42 +000017741 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
sewardj80611e32012-01-20 13:07:24 +000017742 Insert Packed Single Precision Floating-Point Value (XMM) */
17743 if (have66noF2noF3(pfx) && sz == 2) {
sewardjcfca8cd2012-05-27 08:25:42 +000017744 UInt imm8;
17745 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
17746 const IRTemp inval = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000017747
17748 modrm = getUChar(delta);
sewardjcfca8cd2012-05-27 08:25:42 +000017749 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017750
17751 if ( epartIsReg( modrm ) ) {
sewardjcfca8cd2012-05-27 08:25:42 +000017752 UInt rE = eregOfRexRM(pfx, modrm);
17753 IRTemp vE = newTemp(Ity_V128);
17754 assign( vE, getXMMReg(rE) );
17755 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000017756 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000017757 imm8 = getUChar(delta+1);
17758 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
sewardj80611e32012-01-20 13:07:24 +000017759 delta += 1+1;
sewardjcfca8cd2012-05-27 08:25:42 +000017760 DIP( "insertps $%u, %s,%s\n",
17761 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017762 } else {
sewardjcfca8cd2012-05-27 08:25:42 +000017763 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17764 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
17765 imm8 = getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000017766 delta += alen+1;
sewardjcfca8cd2012-05-27 08:25:42 +000017767 DIP( "insertps $%u, %s,%s\n",
17768 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017769 }
17770
sewardjcfca8cd2012-05-27 08:25:42 +000017771 IRTemp vG = newTemp(Ity_V128);
17772 assign( vG, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017773
sewardjcfca8cd2012-05-27 08:25:42 +000017774 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
sewardj80611e32012-01-20 13:07:24 +000017775 goto decode_success;
17776 }
17777 break;
17778
17779 case 0x22:
17780 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
17781 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
17782 if (have66noF2noF3(pfx)
17783 && sz == 2 /* REX.W is NOT present */) {
sewardj6faf7cc2012-05-25 15:53:01 +000017784 Int imm8_10;
17785 IRTemp src_u32 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000017786 modrm = getUChar(delta);
sewardj6faf7cc2012-05-25 15:53:01 +000017787 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017788
17789 if ( epartIsReg( modrm ) ) {
sewardj6faf7cc2012-05-25 15:53:01 +000017790 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000017791 imm8_10 = (Int)(getUChar(delta+1) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000017792 assign( src_u32, getIReg32( rE ) );
sewardj80611e32012-01-20 13:07:24 +000017793 delta += 1+1;
sewardj6faf7cc2012-05-25 15:53:01 +000017794 DIP( "pinsrd $%d, %s,%s\n",
17795 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017796 } else {
17797 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17798 imm8_10 = (Int)(getUChar(delta+alen) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000017799 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000017800 delta += alen+1;
17801 DIP( "pinsrd $%d, %s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000017802 imm8_10, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017803 }
17804
sewardj6faf7cc2012-05-25 15:53:01 +000017805 IRTemp src_vec = newTemp(Ity_V128);
17806 assign(src_vec, getXMMReg( rG ));
17807 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
17808 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000017809 goto decode_success;
17810 }
17811 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
17812 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
17813 if (have66noF2noF3(pfx)
17814 && sz == 8 /* REX.W is present */) {
sewardj80611e32012-01-20 13:07:24 +000017815 Int imm8_0;
sewardj98d02cc2012-06-02 11:55:25 +000017816 IRTemp src_u64 = newTemp(Ity_I64);
sewardj80611e32012-01-20 13:07:24 +000017817 modrm = getUChar(delta);
sewardj98d02cc2012-06-02 11:55:25 +000017818 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017819
17820 if ( epartIsReg( modrm ) ) {
sewardj98d02cc2012-06-02 11:55:25 +000017821 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000017822 imm8_0 = (Int)(getUChar(delta+1) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000017823 assign( src_u64, getIReg64( rE ) );
sewardj80611e32012-01-20 13:07:24 +000017824 delta += 1+1;
sewardj98d02cc2012-06-02 11:55:25 +000017825 DIP( "pinsrq $%d, %s,%s\n",
17826 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017827 } else {
17828 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17829 imm8_0 = (Int)(getUChar(delta+alen) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000017830 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000017831 delta += alen+1;
17832 DIP( "pinsrq $%d, %s,%s\n",
sewardj98d02cc2012-06-02 11:55:25 +000017833 imm8_0, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017834 }
17835
sewardj98d02cc2012-06-02 11:55:25 +000017836 IRTemp src_vec = newTemp(Ity_V128);
17837 assign(src_vec, getXMMReg( rG ));
17838 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
17839 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000017840 goto decode_success;
17841 }
17842 break;
17843
17844 case 0x40:
17845 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
17846 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
17847 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000017848 modrm = getUChar(delta);
sewardjadf357c2012-06-24 13:44:17 +000017849 Int imm8;
17850 IRTemp src_vec = newTemp(Ity_V128);
17851 IRTemp dst_vec = newTemp(Ity_V128);
17852 UInt rG = gregOfRexRM(pfx, modrm);
17853 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000017854 if ( epartIsReg( modrm ) ) {
sewardjadf357c2012-06-24 13:44:17 +000017855 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017856 imm8 = (Int)getUChar(delta+1);
sewardjadf357c2012-06-24 13:44:17 +000017857 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000017858 delta += 1+1;
sewardjadf357c2012-06-24 13:44:17 +000017859 DIP( "dpps $%d, %s,%s\n",
17860 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017861 } else {
17862 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17863 1/* imm8 is 1 byte after the amode */ );
17864 gen_SEGV_if_not_16_aligned( addr );
sewardjadf357c2012-06-24 13:44:17 +000017865 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000017866 imm8 = (Int)getUChar(delta+alen);
17867 delta += alen+1;
17868 DIP( "dpps $%d, %s,%s\n",
sewardjadf357c2012-06-24 13:44:17 +000017869 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017870 }
sewardjadf357c2012-06-24 13:44:17 +000017871 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
17872 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000017873 goto decode_success;
17874 }
17875 break;
17876
17877 case 0x41:
17878 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
17879 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
17880 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000017881 modrm = getUChar(delta);
17882 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000017883 IRTemp src_vec = newTemp(Ity_V128);
17884 IRTemp dst_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000017885 UInt rG = gregOfRexRM(pfx, modrm);
17886 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000017887 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000017888 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017889 imm8 = (Int)getUChar(delta+1);
sewardj4ed05e02012-06-18 15:01:30 +000017890 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000017891 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000017892 DIP( "dppd $%d, %s,%s\n",
17893 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017894 } else {
17895 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17896 1/* imm8 is 1 byte after the amode */ );
17897 gen_SEGV_if_not_16_aligned( addr );
17898 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
17899 imm8 = (Int)getUChar(delta+alen);
17900 delta += alen+1;
17901 DIP( "dppd $%d, %s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000017902 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017903 }
sewardj4ed05e02012-06-18 15:01:30 +000017904 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
17905 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000017906 goto decode_success;
17907 }
17908 break;
17909
sewardj4d5bce22012-02-21 11:02:44 +000017910 case 0x42:
17911 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
17912 Multiple Packed Sums of Absolule Difference (XMM) */
17913 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4d5bce22012-02-21 11:02:44 +000017914 Int imm8;
17915 IRTemp src_vec = newTemp(Ity_V128);
17916 IRTemp dst_vec = newTemp(Ity_V128);
sewardj8516a1f2012-06-24 14:26:30 +000017917 modrm = getUChar(delta);
17918 UInt rG = gregOfRexRM(pfx, modrm);
sewardj4d5bce22012-02-21 11:02:44 +000017919
sewardj8516a1f2012-06-24 14:26:30 +000017920 assign( dst_vec, getXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000017921
17922 if ( epartIsReg( modrm ) ) {
sewardj8516a1f2012-06-24 14:26:30 +000017923 UInt rE = eregOfRexRM(pfx, modrm);
17924
sewardj4d5bce22012-02-21 11:02:44 +000017925 imm8 = (Int)getUChar(delta+1);
sewardj8516a1f2012-06-24 14:26:30 +000017926 assign( src_vec, getXMMReg(rE) );
sewardj4d5bce22012-02-21 11:02:44 +000017927 delta += 1+1;
17928 DIP( "mpsadbw $%d, %s,%s\n", imm8,
sewardj8516a1f2012-06-24 14:26:30 +000017929 nameXMMReg(rE), nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000017930 } else {
17931 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17932 1/* imm8 is 1 byte after the amode */ );
17933 gen_SEGV_if_not_16_aligned( addr );
17934 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
17935 imm8 = (Int)getUChar(delta+alen);
17936 delta += alen+1;
sewardj8516a1f2012-06-24 14:26:30 +000017937 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000017938 }
17939
sewardj8516a1f2012-06-24 14:26:30 +000017940 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
sewardj4d5bce22012-02-21 11:02:44 +000017941 goto decode_success;
17942 }
17943 break;
17944
sewardj80611e32012-01-20 13:07:24 +000017945 case 0x44:
17946 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
17947 * Carry-less multiplication of selected XMM quadwords into XMM
17948 * registers (a.k.a multiplication of polynomials over GF(2))
17949 */
17950 if (have66noF2noF3(pfx) && sz == 2) {
17951
17952 Int imm8;
17953 IRTemp svec = newTemp(Ity_V128);
17954 IRTemp dvec = newTemp(Ity_V128);
sewardj1407a362012-06-24 15:11:38 +000017955 modrm = getUChar(delta);
17956 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017957
sewardj1407a362012-06-24 15:11:38 +000017958 assign( dvec, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017959
17960 if ( epartIsReg( modrm ) ) {
sewardj1407a362012-06-24 15:11:38 +000017961 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000017962 imm8 = (Int)getUChar(delta+1);
sewardj1407a362012-06-24 15:11:38 +000017963 assign( svec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000017964 delta += 1+1;
17965 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
sewardj1407a362012-06-24 15:11:38 +000017966 nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017967 } else {
17968 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
17969 1/* imm8 is 1 byte after the amode */ );
17970 gen_SEGV_if_not_16_aligned( addr );
17971 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
17972 imm8 = (Int)getUChar(delta+alen);
17973 delta += alen+1;
17974 DIP( "pclmulqdq $%d, %s,%s\n",
sewardj1407a362012-06-24 15:11:38 +000017975 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017976 }
17977
sewardj1407a362012-06-24 15:11:38 +000017978 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000017979 goto decode_success;
17980 }
17981 break;
17982
17983 case 0x60:
17984 case 0x61:
17985 case 0x62:
17986 case 0x63:
17987 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
17988 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
17989 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
17990 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
17991 (selected special cases that actually occur in glibc,
17992 not by any means a complete implementation.)
17993 */
17994 if (have66noF2noF3(pfx) && sz == 2) {
sewardjac75d7b2012-05-23 12:42:39 +000017995 Long delta0 = delta;
17996 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
17997 if (delta > delta0) goto decode_success;
17998 /* else fall though; dis_PCMPxSTRx failed to decode it */
sewardj80611e32012-01-20 13:07:24 +000017999 }
18000 break;
18001
sewardjac75d7b2012-05-23 12:42:39 +000018002 case 0xDF:
philippeff4d6be2012-02-14 21:34:56 +000018003 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
18004 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000018005 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
philippeff4d6be2012-02-14 21:34:56 +000018006 goto decode_success;
18007 }
18008 break;
18009
sewardj80611e32012-01-20 13:07:24 +000018010 default:
18011 break;
18012
18013 }
18014
18015 decode_failure:
18016 *decode_OK = False;
18017 return deltaIN;
18018
18019 decode_success:
18020 *decode_OK = True;
18021 return delta;
18022}
18023
18024
18025/*------------------------------------------------------------*/
18026/*--- ---*/
18027/*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
18028/*--- ---*/
18029/*------------------------------------------------------------*/
18030
18031__attribute__((noinline))
18032static
18033Long dis_ESC_NONE (
18034 /*MB_OUT*/DisResult* dres,
18035 /*MB_OUT*/Bool* expect_CAS,
18036 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
18037 Bool resteerCisOk,
18038 void* callback_opaque,
18039 VexArchInfo* archinfo,
18040 VexAbiInfo* vbi,
18041 Prefix pfx, Int sz, Long deltaIN
18042 )
18043{
18044 Long d64 = 0;
18045 UChar abyte = 0;
18046 IRTemp addr = IRTemp_INVALID;
18047 IRTemp t1 = IRTemp_INVALID;
18048 IRTemp t2 = IRTemp_INVALID;
18049 IRTemp t3 = IRTemp_INVALID;
18050 IRTemp t4 = IRTemp_INVALID;
18051 IRTemp t5 = IRTemp_INVALID;
18052 IRType ty = Ity_INVALID;
18053 UChar modrm = 0;
18054 Int am_sz = 0;
18055 Int d_sz = 0;
18056 Int alen = 0;
18057 HChar dis_buf[50];
18058
18059 Long delta = deltaIN;
18060 UChar opc = getUChar(delta);
18061 delta++;
18062 switch (opc) {
18063
18064 case 0x00: /* ADD Gb,Eb */
18065 if (haveF2orF3(pfx)) goto decode_failure;
18066 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
18067 return delta;
18068 case 0x01: /* ADD Gv,Ev */
18069 if (haveF2orF3(pfx)) goto decode_failure;
18070 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
18071 return delta;
18072
18073 case 0x02: /* ADD Eb,Gb */
18074 if (haveF2orF3(pfx)) goto decode_failure;
18075 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
18076 return delta;
18077 case 0x03: /* ADD Ev,Gv */
18078 if (haveF2orF3(pfx)) goto decode_failure;
18079 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
18080 return delta;
18081
18082 case 0x04: /* ADD Ib, AL */
18083 if (haveF2orF3(pfx)) goto decode_failure;
18084 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
18085 return delta;
18086 case 0x05: /* ADD Iv, eAX */
18087 if (haveF2orF3(pfx)) goto decode_failure;
18088 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
18089 return delta;
18090
18091 case 0x08: /* OR Gb,Eb */
18092 if (haveF2orF3(pfx)) goto decode_failure;
18093 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
18094 return delta;
18095 case 0x09: /* OR Gv,Ev */
18096 if (haveF2orF3(pfx)) goto decode_failure;
18097 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
18098 return delta;
18099
18100 case 0x0A: /* OR Eb,Gb */
18101 if (haveF2orF3(pfx)) goto decode_failure;
18102 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
18103 return delta;
18104 case 0x0B: /* OR Ev,Gv */
18105 if (haveF2orF3(pfx)) goto decode_failure;
18106 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
18107 return delta;
18108
18109 case 0x0C: /* OR Ib, AL */
18110 if (haveF2orF3(pfx)) goto decode_failure;
18111 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
18112 return delta;
18113 case 0x0D: /* OR Iv, eAX */
18114 if (haveF2orF3(pfx)) goto decode_failure;
18115 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
18116 return delta;
18117
18118 case 0x10: /* ADC Gb,Eb */
18119 if (haveF2orF3(pfx)) goto decode_failure;
18120 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
18121 return delta;
18122 case 0x11: /* ADC Gv,Ev */
18123 if (haveF2orF3(pfx)) goto decode_failure;
18124 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
18125 return delta;
18126
18127 case 0x12: /* ADC Eb,Gb */
18128 if (haveF2orF3(pfx)) goto decode_failure;
18129 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
18130 return delta;
18131 case 0x13: /* ADC Ev,Gv */
18132 if (haveF2orF3(pfx)) goto decode_failure;
18133 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
18134 return delta;
18135
18136 case 0x14: /* ADC Ib, AL */
18137 if (haveF2orF3(pfx)) goto decode_failure;
18138 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
18139 return delta;
18140 case 0x15: /* ADC Iv, eAX */
18141 if (haveF2orF3(pfx)) goto decode_failure;
18142 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
18143 return delta;
18144
18145 case 0x18: /* SBB Gb,Eb */
18146 if (haveF2orF3(pfx)) goto decode_failure;
18147 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
18148 return delta;
18149 case 0x19: /* SBB Gv,Ev */
18150 if (haveF2orF3(pfx)) goto decode_failure;
18151 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
18152 return delta;
18153
18154 case 0x1A: /* SBB Eb,Gb */
18155 if (haveF2orF3(pfx)) goto decode_failure;
18156 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
18157 return delta;
18158 case 0x1B: /* SBB Ev,Gv */
18159 if (haveF2orF3(pfx)) goto decode_failure;
18160 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
18161 return delta;
18162
18163 case 0x1C: /* SBB Ib, AL */
18164 if (haveF2orF3(pfx)) goto decode_failure;
18165 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
18166 return delta;
18167 case 0x1D: /* SBB Iv, eAX */
18168 if (haveF2orF3(pfx)) goto decode_failure;
18169 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
18170 return delta;
18171
18172 case 0x20: /* AND Gb,Eb */
18173 if (haveF2orF3(pfx)) goto decode_failure;
18174 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
18175 return delta;
18176 case 0x21: /* AND Gv,Ev */
18177 if (haveF2orF3(pfx)) goto decode_failure;
18178 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
18179 return delta;
18180
18181 case 0x22: /* AND Eb,Gb */
18182 if (haveF2orF3(pfx)) goto decode_failure;
18183 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
18184 return delta;
18185 case 0x23: /* AND Ev,Gv */
18186 if (haveF2orF3(pfx)) goto decode_failure;
18187 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
18188 return delta;
18189
18190 case 0x24: /* AND Ib, AL */
18191 if (haveF2orF3(pfx)) goto decode_failure;
18192 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
18193 return delta;
18194 case 0x25: /* AND Iv, eAX */
18195 if (haveF2orF3(pfx)) goto decode_failure;
18196 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
18197 return delta;
18198
18199 case 0x28: /* SUB Gb,Eb */
18200 if (haveF2orF3(pfx)) goto decode_failure;
18201 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
18202 return delta;
18203 case 0x29: /* SUB Gv,Ev */
18204 if (haveF2orF3(pfx)) goto decode_failure;
18205 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
18206 return delta;
18207
18208 case 0x2A: /* SUB Eb,Gb */
18209 if (haveF2orF3(pfx)) goto decode_failure;
18210 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
18211 return delta;
18212 case 0x2B: /* SUB Ev,Gv */
18213 if (haveF2orF3(pfx)) goto decode_failure;
18214 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
18215 return delta;
18216
18217 case 0x2C: /* SUB Ib, AL */
18218 if (haveF2orF3(pfx)) goto decode_failure;
18219 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
18220 return delta;
18221
18222 case 0x2D: /* SUB Iv, eAX */
18223 if (haveF2orF3(pfx)) goto decode_failure;
18224 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
18225 return delta;
18226
18227 case 0x30: /* XOR Gb,Eb */
18228 if (haveF2orF3(pfx)) goto decode_failure;
18229 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
18230 return delta;
18231 case 0x31: /* XOR Gv,Ev */
18232 if (haveF2orF3(pfx)) goto decode_failure;
18233 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
18234 return delta;
18235
18236 case 0x32: /* XOR Eb,Gb */
18237 if (haveF2orF3(pfx)) goto decode_failure;
18238 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
18239 return delta;
18240 case 0x33: /* XOR Ev,Gv */
18241 if (haveF2orF3(pfx)) goto decode_failure;
18242 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
18243 return delta;
18244
18245 case 0x34: /* XOR Ib, AL */
18246 if (haveF2orF3(pfx)) goto decode_failure;
18247 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
18248 return delta;
18249 case 0x35: /* XOR Iv, eAX */
18250 if (haveF2orF3(pfx)) goto decode_failure;
18251 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
18252 return delta;
18253
18254 case 0x38: /* CMP Gb,Eb */
18255 if (haveF2orF3(pfx)) goto decode_failure;
18256 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
18257 return delta;
18258 case 0x39: /* CMP Gv,Ev */
18259 if (haveF2orF3(pfx)) goto decode_failure;
18260 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
18261 return delta;
18262
18263 case 0x3A: /* CMP Eb,Gb */
18264 if (haveF2orF3(pfx)) goto decode_failure;
18265 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
18266 return delta;
18267 case 0x3B: /* CMP Ev,Gv */
18268 if (haveF2orF3(pfx)) goto decode_failure;
18269 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
18270 return delta;
18271
18272 case 0x3C: /* CMP Ib, AL */
18273 if (haveF2orF3(pfx)) goto decode_failure;
18274 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
18275 return delta;
18276 case 0x3D: /* CMP Iv, eAX */
18277 if (haveF2orF3(pfx)) goto decode_failure;
18278 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
18279 return delta;
18280
18281 case 0x50: /* PUSH eAX */
18282 case 0x51: /* PUSH eCX */
18283 case 0x52: /* PUSH eDX */
18284 case 0x53: /* PUSH eBX */
18285 case 0x55: /* PUSH eBP */
18286 case 0x56: /* PUSH eSI */
18287 case 0x57: /* PUSH eDI */
18288 case 0x54: /* PUSH eSP */
18289 /* This is the Right Way, in that the value to be pushed is
18290 established before %rsp is changed, so that pushq %rsp
18291 correctly pushes the old value. */
18292 if (haveF2orF3(pfx)) goto decode_failure;
18293 vassert(sz == 2 || sz == 4 || sz == 8);
18294 if (sz == 4)
18295 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
18296 ty = sz==2 ? Ity_I16 : Ity_I64;
18297 t1 = newTemp(ty);
18298 t2 = newTemp(Ity_I64);
18299 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
18300 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
18301 putIReg64(R_RSP, mkexpr(t2) );
18302 storeLE(mkexpr(t2),mkexpr(t1));
18303 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
18304 return delta;
18305
18306 case 0x58: /* POP eAX */
18307 case 0x59: /* POP eCX */
18308 case 0x5A: /* POP eDX */
18309 case 0x5B: /* POP eBX */
18310 case 0x5D: /* POP eBP */
18311 case 0x5E: /* POP eSI */
18312 case 0x5F: /* POP eDI */
18313 case 0x5C: /* POP eSP */
18314 if (haveF2orF3(pfx)) goto decode_failure;
18315 vassert(sz == 2 || sz == 4 || sz == 8);
18316 if (sz == 4)
18317 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
18318 t1 = newTemp(szToITy(sz));
18319 t2 = newTemp(Ity_I64);
18320 assign(t2, getIReg64(R_RSP));
18321 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
18322 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
18323 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
18324 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
18325 return delta;
18326
18327 case 0x63: /* MOVSX */
18328 if (haveF2orF3(pfx)) goto decode_failure;
18329 if (haveREX(pfx) && 1==getRexW(pfx)) {
18330 vassert(sz == 8);
18331 /* movsx r/m32 to r64 */
18332 modrm = getUChar(delta);
18333 if (epartIsReg(modrm)) {
18334 delta++;
18335 putIRegG(8, pfx, modrm,
18336 unop(Iop_32Sto64,
18337 getIRegE(4, pfx, modrm)));
18338 DIP("movslq %s,%s\n",
18339 nameIRegE(4, pfx, modrm),
18340 nameIRegG(8, pfx, modrm));
18341 return delta;
18342 } else {
18343 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18344 delta += alen;
18345 putIRegG(8, pfx, modrm,
18346 unop(Iop_32Sto64,
18347 loadLE(Ity_I32, mkexpr(addr))));
18348 DIP("movslq %s,%s\n", dis_buf,
18349 nameIRegG(8, pfx, modrm));
18350 return delta;
18351 }
18352 } else {
18353 goto decode_failure;
18354 }
18355
18356 case 0x68: /* PUSH Iv */
18357 if (haveF2orF3(pfx)) goto decode_failure;
18358 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
18359 if (sz == 4) sz = 8;
18360 d64 = getSDisp(imin(4,sz),delta);
18361 delta += imin(4,sz);
18362 goto do_push_I;
18363
18364 case 0x69: /* IMUL Iv, Ev, Gv */
18365 if (haveF2orF3(pfx)) goto decode_failure;
18366 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
18367 return delta;
18368
18369 case 0x6A: /* PUSH Ib, sign-extended to sz */
18370 if (haveF2orF3(pfx)) goto decode_failure;
18371 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
18372 if (sz == 4) sz = 8;
18373 d64 = getSDisp8(delta); delta += 1;
18374 goto do_push_I;
18375 do_push_I:
18376 ty = szToITy(sz);
18377 t1 = newTemp(Ity_I64);
18378 t2 = newTemp(ty);
18379 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
18380 putIReg64(R_RSP, mkexpr(t1) );
18381 /* stop mkU16 asserting if d32 is a negative 16-bit number
18382 (bug #132813) */
18383 if (ty == Ity_I16)
18384 d64 &= 0xFFFF;
18385 storeLE( mkexpr(t1), mkU(ty,d64) );
18386 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
18387 return delta;
18388
18389 case 0x6B: /* IMUL Ib, Ev, Gv */
18390 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
18391 return delta;
18392
18393 case 0x70:
18394 case 0x71:
18395 case 0x72: /* JBb/JNAEb (jump below) */
18396 case 0x73: /* JNBb/JAEb (jump not below) */
18397 case 0x74: /* JZb/JEb (jump zero) */
18398 case 0x75: /* JNZb/JNEb (jump not zero) */
18399 case 0x76: /* JBEb/JNAb (jump below or equal) */
18400 case 0x77: /* JNBEb/JAb (jump not below or equal) */
18401 case 0x78: /* JSb (jump negative) */
18402 case 0x79: /* JSb (jump not negative) */
18403 case 0x7A: /* JP (jump parity even) */
18404 case 0x7B: /* JNP/JPO (jump parity odd) */
18405 case 0x7C: /* JLb/JNGEb (jump less) */
18406 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
18407 case 0x7E: /* JLEb/JNGb (jump less or equal) */
18408 case 0x7F: { /* JGb/JNLEb (jump greater) */
18409 Long jmpDelta;
18410 HChar* comment = "";
18411 if (haveF2orF3(pfx)) goto decode_failure;
18412 jmpDelta = getSDisp8(delta);
18413 vassert(-128 <= jmpDelta && jmpDelta < 128);
18414 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
18415 delta++;
18416 if (resteerCisOk
18417 && vex_control.guest_chase_cond
18418 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
18419 && jmpDelta < 0
18420 && resteerOkFn( callback_opaque, d64) ) {
18421 /* Speculation: assume this backward branch is taken. So we
18422 need to emit a side-exit to the insn following this one,
18423 on the negation of the condition, and continue at the
18424 branch target address (d64). If we wind up back at the
18425 first instruction of the trace, just stop; it's better to
18426 let the IR loop unroller handle that case. */
18427 stmt( IRStmt_Exit(
18428 mk_amd64g_calculate_condition(
18429 (AMD64Condcode)(1 ^ (opc - 0x70))),
18430 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000018431 IRConst_U64(guest_RIP_bbstart+delta),
18432 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000018433 dres->whatNext = Dis_ResteerC;
18434 dres->continueAt = d64;
18435 comment = "(assumed taken)";
18436 }
18437 else
18438 if (resteerCisOk
18439 && vex_control.guest_chase_cond
18440 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
18441 && jmpDelta >= 0
18442 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
18443 /* Speculation: assume this forward branch is not taken. So
18444 we need to emit a side-exit to d64 (the dest) and continue
18445 disassembling at the insn immediately following this
18446 one. */
18447 stmt( IRStmt_Exit(
18448 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
18449 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000018450 IRConst_U64(d64),
18451 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000018452 dres->whatNext = Dis_ResteerC;
18453 dres->continueAt = guest_RIP_bbstart+delta;
18454 comment = "(assumed not taken)";
18455 }
18456 else {
18457 /* Conservative default translation - end the block at this
18458 point. */
sewardjc6f970f2012-04-02 21:54:49 +000018459 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
18460 guest_RIP_bbstart+delta, d64 );
18461 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000018462 }
18463 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
18464 return delta;
18465 }
18466
18467 case 0x80: /* Grp1 Ib,Eb */
18468 if (haveF2orF3(pfx)) goto decode_failure;
18469 modrm = getUChar(delta);
18470 am_sz = lengthAMode(pfx,delta);
18471 sz = 1;
18472 d_sz = 1;
18473 d64 = getSDisp8(delta + am_sz);
18474 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
18475 return delta;
18476
18477 case 0x81: /* Grp1 Iv,Ev */
18478 if (haveF2orF3(pfx)) goto decode_failure;
18479 modrm = getUChar(delta);
18480 am_sz = lengthAMode(pfx,delta);
18481 d_sz = imin(sz,4);
18482 d64 = getSDisp(d_sz, delta + am_sz);
18483 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
18484 return delta;
18485
18486 case 0x83: /* Grp1 Ib,Ev */
18487 if (haveF2orF3(pfx)) goto decode_failure;
18488 modrm = getUChar(delta);
18489 am_sz = lengthAMode(pfx,delta);
18490 d_sz = 1;
18491 d64 = getSDisp8(delta + am_sz);
18492 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
18493 return delta;
18494
18495 case 0x84: /* TEST Eb,Gb */
18496 if (haveF2orF3(pfx)) goto decode_failure;
18497 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
18498 return delta;
18499
18500 case 0x85: /* TEST Ev,Gv */
18501 if (haveF2orF3(pfx)) goto decode_failure;
18502 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
18503 return delta;
18504
18505 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
18506 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
18507 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
18508 used with an explicit LOCK prefix, we don't want to end up with
18509 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
18510 the generic LOCK logic at the top of disInstr. */
18511 case 0x86: /* XCHG Gb,Eb */
18512 sz = 1;
18513 /* Fall through ... */
18514 case 0x87: /* XCHG Gv,Ev */
18515 if (haveF2orF3(pfx)) goto decode_failure;
18516 modrm = getUChar(delta);
18517 ty = szToITy(sz);
18518 t1 = newTemp(ty); t2 = newTemp(ty);
18519 if (epartIsReg(modrm)) {
18520 assign(t1, getIRegE(sz, pfx, modrm));
18521 assign(t2, getIRegG(sz, pfx, modrm));
18522 putIRegG(sz, pfx, modrm, mkexpr(t1));
18523 putIRegE(sz, pfx, modrm, mkexpr(t2));
18524 delta++;
18525 DIP("xchg%c %s, %s\n",
18526 nameISize(sz), nameIRegG(sz, pfx, modrm),
18527 nameIRegE(sz, pfx, modrm));
18528 } else {
18529 *expect_CAS = True;
18530 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18531 assign( t1, loadLE(ty, mkexpr(addr)) );
18532 assign( t2, getIRegG(sz, pfx, modrm) );
18533 casLE( mkexpr(addr),
18534 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
18535 putIRegG( sz, pfx, modrm, mkexpr(t1) );
18536 delta += alen;
18537 DIP("xchg%c %s, %s\n", nameISize(sz),
18538 nameIRegG(sz, pfx, modrm), dis_buf);
18539 }
18540 return delta;
18541
18542 case 0x88: /* MOV Gb,Eb */
18543 if (haveF2orF3(pfx)) goto decode_failure;
18544 delta = dis_mov_G_E(vbi, pfx, 1, delta);
18545 return delta;
18546
18547 case 0x89: /* MOV Gv,Ev */
18548 if (haveF2orF3(pfx)) goto decode_failure;
18549 delta = dis_mov_G_E(vbi, pfx, sz, delta);
18550 return delta;
18551
18552 case 0x8A: /* MOV Eb,Gb */
18553 if (haveF2orF3(pfx)) goto decode_failure;
18554 delta = dis_mov_E_G(vbi, pfx, 1, delta);
18555 return delta;
18556
18557 case 0x8B: /* MOV Ev,Gv */
18558 if (haveF2orF3(pfx)) goto decode_failure;
18559 delta = dis_mov_E_G(vbi, pfx, sz, delta);
18560 return delta;
18561
18562 case 0x8D: /* LEA M,Gv */
18563 if (haveF2orF3(pfx)) goto decode_failure;
18564 if (sz != 4 && sz != 8)
18565 goto decode_failure;
18566 modrm = getUChar(delta);
18567 if (epartIsReg(modrm))
18568 goto decode_failure;
18569 /* NOTE! this is the one place where a segment override prefix
18570 has no effect on the address calculation. Therefore we clear
18571 any segment override bits in pfx. */
18572 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
18573 delta += alen;
18574 /* This is a hack. But it isn't clear that really doing the
18575 calculation at 32 bits is really worth it. Hence for leal,
18576 do the full 64-bit calculation and then truncate it. */
18577 putIRegG( sz, pfx, modrm,
18578 sz == 4
18579 ? unop(Iop_64to32, mkexpr(addr))
18580 : mkexpr(addr)
18581 );
18582 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
18583 nameIRegG(sz,pfx,modrm));
18584 return delta;
18585
18586 case 0x8F: { /* POPQ m64 / POPW m16 */
18587 Int len;
18588 UChar rm;
18589 /* There is no encoding for 32-bit pop in 64-bit mode.
18590 So sz==4 actually means sz==8. */
18591 if (haveF2orF3(pfx)) goto decode_failure;
18592 vassert(sz == 2 || sz == 4
18593 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
18594 if (sz == 4) sz = 8;
18595 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
18596
18597 rm = getUChar(delta);
18598
18599 /* make sure this instruction is correct POP */
18600 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
18601 goto decode_failure;
18602 /* and has correct size */
18603 vassert(sz == 8);
18604
18605 t1 = newTemp(Ity_I64);
18606 t3 = newTemp(Ity_I64);
18607 assign( t1, getIReg64(R_RSP) );
18608 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
18609
18610 /* Increase RSP; must be done before the STORE. Intel manual
18611 says: If the RSP register is used as a base register for
18612 addressing a destination operand in memory, the POP
18613 instruction computes the effective address of the operand
18614 after it increments the RSP register. */
18615 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
18616
18617 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
18618 storeLE( mkexpr(addr), mkexpr(t3) );
18619
18620 DIP("popl %s\n", dis_buf);
18621
18622 delta += len;
18623 return delta;
18624 }
18625
18626 case 0x90: /* XCHG eAX,eAX */
18627 /* detect and handle F3 90 (rep nop) specially */
18628 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
18629 DIP("rep nop (P4 pause)\n");
18630 /* "observe" the hint. The Vex client needs to be careful not
18631 to cause very long delays as a result, though. */
sewardjc6f970f2012-04-02 21:54:49 +000018632 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
18633 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000018634 return delta;
18635 }
18636 /* detect and handle NOPs specially */
18637 if (/* F2/F3 probably change meaning completely */
18638 !haveF2orF3(pfx)
18639 /* If REX.B is 1, we're not exchanging rAX with itself */
18640 && getRexB(pfx)==0 ) {
18641 DIP("nop\n");
18642 return delta;
18643 }
18644 /* else fall through to normal case. */
18645 case 0x91: /* XCHG rAX,rCX */
18646 case 0x92: /* XCHG rAX,rDX */
18647 case 0x93: /* XCHG rAX,rBX */
18648 case 0x94: /* XCHG rAX,rSP */
18649 case 0x95: /* XCHG rAX,rBP */
18650 case 0x96: /* XCHG rAX,rSI */
18651 case 0x97: /* XCHG rAX,rDI */
18652 /* guard against mutancy */
18653 if (haveF2orF3(pfx)) goto decode_failure;
18654 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
18655 return delta;
18656
18657 case 0x98: /* CBW */
18658 if (haveF2orF3(pfx)) goto decode_failure;
18659 if (sz == 8) {
18660 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
18661 DIP(/*"cdqe\n"*/"cltq");
18662 return delta;
18663 }
18664 if (sz == 4) {
18665 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
18666 DIP("cwtl\n");
18667 return delta;
18668 }
18669 if (sz == 2) {
18670 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
18671 DIP("cbw\n");
18672 return delta;
18673 }
18674 goto decode_failure;
18675
18676 case 0x99: /* CWD/CDQ/CQO */
18677 if (haveF2orF3(pfx)) goto decode_failure;
18678 vassert(sz == 2 || sz == 4 || sz == 8);
18679 ty = szToITy(sz);
18680 putIRegRDX( sz,
18681 binop(mkSizedOp(ty,Iop_Sar8),
18682 getIRegRAX(sz),
18683 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
18684 DIP(sz == 2 ? "cwd\n"
18685 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
18686 : "cqo\n"));
18687 return delta;
18688
18689 case 0x9B: /* FWAIT (X87 insn) */
18690 /* ignore? */
18691 DIP("fwait\n");
18692 return delta;
18693
18694 case 0x9C: /* PUSHF */ {
18695 /* Note. There is no encoding for a 32-bit pushf in 64-bit
18696 mode. So sz==4 actually means sz==8. */
18697 /* 24 July 06: has also been seen with a redundant REX prefix,
18698 so must also allow sz==8. */
18699 if (haveF2orF3(pfx)) goto decode_failure;
18700 vassert(sz == 2 || sz == 4 || sz == 8);
18701 if (sz == 4) sz = 8;
18702 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
18703
18704 t1 = newTemp(Ity_I64);
18705 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
18706 putIReg64(R_RSP, mkexpr(t1) );
18707
18708 t2 = newTemp(Ity_I64);
18709 assign( t2, mk_amd64g_calculate_rflags_all() );
18710
18711 /* Patch in the D flag. This can simply be a copy of bit 10 of
18712 baseBlock[OFFB_DFLAG]. */
18713 t3 = newTemp(Ity_I64);
18714 assign( t3, binop(Iop_Or64,
18715 mkexpr(t2),
18716 binop(Iop_And64,
18717 IRExpr_Get(OFFB_DFLAG,Ity_I64),
18718 mkU64(1<<10)))
18719 );
18720
18721 /* And patch in the ID flag. */
18722 t4 = newTemp(Ity_I64);
18723 assign( t4, binop(Iop_Or64,
18724 mkexpr(t3),
18725 binop(Iop_And64,
18726 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
18727 mkU8(21)),
18728 mkU64(1<<21)))
18729 );
18730
18731 /* And patch in the AC flag too. */
18732 t5 = newTemp(Ity_I64);
18733 assign( t5, binop(Iop_Or64,
18734 mkexpr(t4),
18735 binop(Iop_And64,
18736 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
18737 mkU8(18)),
18738 mkU64(1<<18)))
18739 );
18740
18741 /* if sz==2, the stored value needs to be narrowed. */
18742 if (sz == 2)
18743 storeLE( mkexpr(t1), unop(Iop_32to16,
18744 unop(Iop_64to32,mkexpr(t5))) );
18745 else
18746 storeLE( mkexpr(t1), mkexpr(t5) );
18747
18748 DIP("pushf%c\n", nameISize(sz));
18749 return delta;
18750 }
18751
18752 case 0x9D: /* POPF */
18753 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
18754 So sz==4 actually means sz==8. */
18755 if (haveF2orF3(pfx)) goto decode_failure;
18756 vassert(sz == 2 || sz == 4);
18757 if (sz == 4) sz = 8;
18758 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
18759 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
18760 assign(t2, getIReg64(R_RSP));
18761 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
18762 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
18763 /* t1 is the flag word. Mask out everything except OSZACP and
18764 set the flags thunk to AMD64G_CC_OP_COPY. */
18765 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18766 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18767 stmt( IRStmt_Put( OFFB_CC_DEP1,
18768 binop(Iop_And64,
18769 mkexpr(t1),
18770 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
18771 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
18772 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
18773 )
18774 )
18775 );
18776
18777 /* Also need to set the D flag, which is held in bit 10 of t1.
18778 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
18779 stmt( IRStmt_Put(
18780 OFFB_DFLAG,
18781 IRExpr_Mux0X(
18782 unop(Iop_32to8,
18783 unop(Iop_64to32,
18784 binop(Iop_And64,
18785 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
18786 mkU64(1)))),
18787 mkU64(1),
18788 mkU64(0xFFFFFFFFFFFFFFFFULL)))
18789 );
18790
18791 /* And set the ID flag */
18792 stmt( IRStmt_Put(
18793 OFFB_IDFLAG,
18794 IRExpr_Mux0X(
18795 unop(Iop_32to8,
18796 unop(Iop_64to32,
18797 binop(Iop_And64,
18798 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
18799 mkU64(1)))),
18800 mkU64(0),
18801 mkU64(1)))
18802 );
18803
18804 /* And set the AC flag too */
18805 stmt( IRStmt_Put(
18806 OFFB_ACFLAG,
18807 IRExpr_Mux0X(
18808 unop(Iop_32to8,
18809 unop(Iop_64to32,
18810 binop(Iop_And64,
18811 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
18812 mkU64(1)))),
18813 mkU64(0),
18814 mkU64(1)))
18815 );
18816
18817 DIP("popf%c\n", nameISize(sz));
18818 return delta;
18819
18820 case 0x9E: /* SAHF */
18821 codegen_SAHF();
18822 DIP("sahf\n");
18823 return delta;
18824
18825 case 0x9F: /* LAHF */
18826 codegen_LAHF();
18827 DIP("lahf\n");
18828 return delta;
18829
18830 case 0xA0: /* MOV Ob,AL */
18831 if (have66orF2orF3(pfx)) goto decode_failure;
18832 sz = 1;
18833 /* Fall through ... */
18834 case 0xA1: /* MOV Ov,eAX */
18835 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
18836 goto decode_failure;
18837 d64 = getDisp64(delta);
18838 delta += 8;
18839 ty = szToITy(sz);
18840 addr = newTemp(Ity_I64);
18841 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
18842 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
18843 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
18844 segRegTxt(pfx), d64,
18845 nameIRegRAX(sz));
18846 return delta;
18847
18848 case 0xA2: /* MOV AL,Ob */
18849 if (have66orF2orF3(pfx)) goto decode_failure;
18850 sz = 1;
18851 /* Fall through ... */
18852 case 0xA3: /* MOV eAX,Ov */
18853 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
18854 goto decode_failure;
18855 d64 = getDisp64(delta);
18856 delta += 8;
18857 ty = szToITy(sz);
18858 addr = newTemp(Ity_I64);
18859 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
18860 storeLE( mkexpr(addr), getIRegRAX(sz) );
18861 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
18862 segRegTxt(pfx), d64);
18863 return delta;
18864
18865 case 0xA4:
18866 case 0xA5:
18867 /* F3 A4: rep movsb */
18868 if (haveF3(pfx) && !haveF2(pfx)) {
18869 if (opc == 0xA4)
18870 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000018871 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
sewardj80611e32012-01-20 13:07:24 +000018872 guest_RIP_curr_instr,
18873 guest_RIP_bbstart+delta, "rep movs", pfx );
18874 dres->whatNext = Dis_StopHere;
18875 return delta;
18876 }
18877 /* A4: movsb */
18878 if (!haveF3(pfx) && !haveF2(pfx)) {
18879 if (opc == 0xA4)
18880 sz = 1;
18881 dis_string_op( dis_MOVS, sz, "movs", pfx );
18882 return delta;
18883 }
18884 goto decode_failure;
18885
18886 case 0xA6:
18887 case 0xA7:
18888 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
18889 if (haveF3(pfx) && !haveF2(pfx)) {
18890 if (opc == 0xA6)
18891 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000018892 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
sewardj80611e32012-01-20 13:07:24 +000018893 guest_RIP_curr_instr,
18894 guest_RIP_bbstart+delta, "repe cmps", pfx );
18895 dres->whatNext = Dis_StopHere;
18896 return delta;
18897 }
18898 goto decode_failure;
18899
18900 case 0xAA:
18901 case 0xAB:
18902 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
18903 if (haveF3(pfx) && !haveF2(pfx)) {
18904 if (opc == 0xAA)
18905 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000018906 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
sewardj80611e32012-01-20 13:07:24 +000018907 guest_RIP_curr_instr,
18908 guest_RIP_bbstart+delta, "rep stos", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000018909 vassert(dres->whatNext == Dis_StopHere);
18910 return delta;
sewardj80611e32012-01-20 13:07:24 +000018911 }
18912 /* AA/AB: stosb/stos{w,l,q} */
18913 if (!haveF3(pfx) && !haveF2(pfx)) {
18914 if (opc == 0xAA)
18915 sz = 1;
18916 dis_string_op( dis_STOS, sz, "stos", pfx );
18917 return delta;
18918 }
18919 goto decode_failure;
18920
18921 case 0xA8: /* TEST Ib, AL */
18922 if (haveF2orF3(pfx)) goto decode_failure;
18923 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
18924 return delta;
18925 case 0xA9: /* TEST Iv, eAX */
18926 if (haveF2orF3(pfx)) goto decode_failure;
18927 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
18928 return delta;
18929
18930 case 0xAC: /* LODS, no REP prefix */
18931 case 0xAD:
18932 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
18933 return delta;
18934
18935 case 0xAE:
18936 case 0xAF:
18937 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
18938 if (haveF2(pfx) && !haveF3(pfx)) {
18939 if (opc == 0xAE)
18940 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000018941 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000018942 guest_RIP_curr_instr,
18943 guest_RIP_bbstart+delta, "repne scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000018944 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000018945 return delta;
18946 }
18947 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
18948 if (!haveF2(pfx) && haveF3(pfx)) {
18949 if (opc == 0xAE)
18950 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000018951 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000018952 guest_RIP_curr_instr,
18953 guest_RIP_bbstart+delta, "repe scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000018954 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000018955 return delta;
18956 }
18957 /* AE/AF: scasb/scas{w,l,q} */
18958 if (!haveF2(pfx) && !haveF3(pfx)) {
18959 if (opc == 0xAE)
18960 sz = 1;
18961 dis_string_op( dis_SCAS, sz, "scas", pfx );
18962 return delta;
18963 }
18964 goto decode_failure;
18965
18966 /* XXXX be careful here with moves to AH/BH/CH/DH */
18967 case 0xB0: /* MOV imm,AL */
18968 case 0xB1: /* MOV imm,CL */
18969 case 0xB2: /* MOV imm,DL */
18970 case 0xB3: /* MOV imm,BL */
18971 case 0xB4: /* MOV imm,AH */
18972 case 0xB5: /* MOV imm,CH */
18973 case 0xB6: /* MOV imm,DH */
18974 case 0xB7: /* MOV imm,BH */
18975 if (haveF2orF3(pfx)) goto decode_failure;
18976 d64 = getUChar(delta);
18977 delta += 1;
18978 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
18979 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
18980 return delta;
18981
18982 case 0xB8: /* MOV imm,eAX */
18983 case 0xB9: /* MOV imm,eCX */
18984 case 0xBA: /* MOV imm,eDX */
18985 case 0xBB: /* MOV imm,eBX */
18986 case 0xBC: /* MOV imm,eSP */
18987 case 0xBD: /* MOV imm,eBP */
18988 case 0xBE: /* MOV imm,eSI */
18989 case 0xBF: /* MOV imm,eDI */
18990 /* This is the one-and-only place where 64-bit literals are
18991 allowed in the instruction stream. */
18992 if (haveF2orF3(pfx)) goto decode_failure;
18993 if (sz == 8) {
18994 d64 = getDisp64(delta);
18995 delta += 8;
18996 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
18997 DIP("movabsq $%lld,%s\n", (Long)d64,
18998 nameIRegRexB(8,pfx,opc-0xB8));
18999 } else {
19000 d64 = getSDisp(imin(4,sz),delta);
19001 delta += imin(4,sz);
19002 putIRegRexB(sz, pfx, opc-0xB8,
19003 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
19004 DIP("mov%c $%lld,%s\n", nameISize(sz),
19005 (Long)d64,
19006 nameIRegRexB(sz,pfx,opc-0xB8));
19007 }
19008 return delta;
19009
19010 case 0xC0: { /* Grp2 Ib,Eb */
19011 Bool decode_OK = True;
19012 if (haveF2orF3(pfx)) goto decode_failure;
19013 modrm = getUChar(delta);
19014 am_sz = lengthAMode(pfx,delta);
19015 d_sz = 1;
19016 d64 = getUChar(delta + am_sz);
19017 sz = 1;
19018 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19019 mkU8(d64 & 0xFF), NULL, &decode_OK );
19020 if (!decode_OK) goto decode_failure;
19021 return delta;
19022 }
19023
19024 case 0xC1: { /* Grp2 Ib,Ev */
19025 Bool decode_OK = True;
19026 if (haveF2orF3(pfx)) goto decode_failure;
19027 modrm = getUChar(delta);
19028 am_sz = lengthAMode(pfx,delta);
19029 d_sz = 1;
19030 d64 = getUChar(delta + am_sz);
19031 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19032 mkU8(d64 & 0xFF), NULL, &decode_OK );
19033 if (!decode_OK) goto decode_failure;
19034 return delta;
19035 }
19036
sewardjdb546602012-02-16 19:09:43 +000019037 case 0xC2: /* RET imm16 */
19038 if (have66orF2orF3(pfx)) goto decode_failure;
19039 d64 = getUDisp16(delta);
19040 delta += 2;
sewardjc6f970f2012-04-02 21:54:49 +000019041 dis_ret(dres, vbi, d64);
sewardjdb546602012-02-16 19:09:43 +000019042 DIP("ret $%lld\n", d64);
19043 return delta;
19044
sewardj80611e32012-01-20 13:07:24 +000019045 case 0xC3: /* RET */
19046 if (have66orF2(pfx)) goto decode_failure;
19047 /* F3 is acceptable on AMD. */
sewardjc6f970f2012-04-02 21:54:49 +000019048 dis_ret(dres, vbi, 0);
sewardj80611e32012-01-20 13:07:24 +000019049 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
19050 return delta;
19051
19052 case 0xC6: /* MOV Ib,Eb */
19053 sz = 1;
19054 goto do_Mov_I_E;
19055 case 0xC7: /* MOV Iv,Ev */
19056 goto do_Mov_I_E;
19057 do_Mov_I_E:
19058 if (haveF2orF3(pfx)) goto decode_failure;
19059 modrm = getUChar(delta);
19060 if (epartIsReg(modrm)) {
19061 delta++; /* mod/rm byte */
19062 d64 = getSDisp(imin(4,sz),delta);
19063 delta += imin(4,sz);
19064 putIRegE(sz, pfx, modrm,
19065 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
19066 DIP("mov%c $%lld, %s\n", nameISize(sz),
19067 (Long)d64,
19068 nameIRegE(sz,pfx,modrm));
19069 } else {
19070 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
19071 /*xtra*/imin(4,sz) );
19072 delta += alen;
19073 d64 = getSDisp(imin(4,sz),delta);
19074 delta += imin(4,sz);
19075 storeLE(mkexpr(addr),
19076 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
19077 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
19078 }
19079 return delta;
19080
19081 case 0xC8: /* ENTER */
19082 /* Same comments re operand size as for LEAVE below apply.
19083 Also, only handles the case "enter $imm16, $0"; other cases
19084 for the second operand (nesting depth) are not handled. */
19085 if (sz != 4)
19086 goto decode_failure;
19087 d64 = getUDisp16(delta);
19088 delta += 2;
19089 vassert(d64 >= 0 && d64 <= 0xFFFF);
19090 if (getUChar(delta) != 0)
19091 goto decode_failure;
19092 delta++;
19093 /* Intel docs seem to suggest:
19094 push rbp
19095 temp = rsp
19096 rbp = temp
19097 rsp = rsp - imm16
19098 */
19099 t1 = newTemp(Ity_I64);
19100 assign(t1, getIReg64(R_RBP));
19101 t2 = newTemp(Ity_I64);
19102 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
19103 putIReg64(R_RSP, mkexpr(t2));
19104 storeLE(mkexpr(t2), mkexpr(t1));
19105 putIReg64(R_RBP, mkexpr(t2));
19106 if (d64 > 0) {
19107 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
19108 }
19109 DIP("enter $%u, $0\n", (UInt)d64);
19110 return delta;
19111
19112 case 0xC9: /* LEAVE */
19113 /* In 64-bit mode this defaults to a 64-bit operand size. There
19114 is no way to encode a 32-bit variant. Hence sz==4 but we do
19115 it as if sz=8. */
19116 if (sz != 4)
19117 goto decode_failure;
19118 t1 = newTemp(Ity_I64);
19119 t2 = newTemp(Ity_I64);
19120 assign(t1, getIReg64(R_RBP));
19121 /* First PUT RSP looks redundant, but need it because RSP must
19122 always be up-to-date for Memcheck to work... */
19123 putIReg64(R_RSP, mkexpr(t1));
19124 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
19125 putIReg64(R_RBP, mkexpr(t2));
19126 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
19127 DIP("leave\n");
19128 return delta;
19129
19130 case 0xCC: /* INT 3 */
sewardjc6f970f2012-04-02 21:54:49 +000019131 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
19132 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019133 DIP("int $0x3\n");
19134 return delta;
19135
19136 case 0xD0: { /* Grp2 1,Eb */
19137 Bool decode_OK = True;
19138 if (haveF2orF3(pfx)) goto decode_failure;
19139 modrm = getUChar(delta);
19140 am_sz = lengthAMode(pfx,delta);
19141 d_sz = 0;
19142 d64 = 1;
19143 sz = 1;
19144 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19145 mkU8(d64), NULL, &decode_OK );
19146 if (!decode_OK) goto decode_failure;
19147 return delta;
19148 }
19149
19150 case 0xD1: { /* Grp2 1,Ev */
19151 Bool decode_OK = True;
19152 if (haveF2orF3(pfx)) goto decode_failure;
19153 modrm = getUChar(delta);
19154 am_sz = lengthAMode(pfx,delta);
19155 d_sz = 0;
19156 d64 = 1;
19157 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19158 mkU8(d64), NULL, &decode_OK );
19159 if (!decode_OK) goto decode_failure;
19160 return delta;
19161 }
19162
19163 case 0xD2: { /* Grp2 CL,Eb */
19164 Bool decode_OK = True;
19165 if (haveF2orF3(pfx)) goto decode_failure;
19166 modrm = getUChar(delta);
19167 am_sz = lengthAMode(pfx,delta);
19168 d_sz = 0;
19169 sz = 1;
19170 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19171 getIRegCL(), "%cl", &decode_OK );
19172 if (!decode_OK) goto decode_failure;
19173 return delta;
19174 }
19175
19176 case 0xD3: { /* Grp2 CL,Ev */
19177 Bool decode_OK = True;
19178 if (haveF2orF3(pfx)) goto decode_failure;
19179 modrm = getUChar(delta);
19180 am_sz = lengthAMode(pfx,delta);
19181 d_sz = 0;
19182 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
19183 getIRegCL(), "%cl", &decode_OK );
19184 if (!decode_OK) goto decode_failure;
19185 return delta;
19186 }
19187
19188 case 0xD8: /* X87 instructions */
19189 case 0xD9:
19190 case 0xDA:
19191 case 0xDB:
19192 case 0xDC:
19193 case 0xDD:
19194 case 0xDE:
19195 case 0xDF: {
19196 Bool redundantREXWok = False;
19197
19198 if (haveF2orF3(pfx))
19199 goto decode_failure;
19200
19201 /* kludge to tolerate redundant rex.w prefixes (should do this
19202 properly one day) */
19203 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
19204 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
19205 redundantREXWok = True;
19206
sewardj9ae42a72012-02-16 14:18:56 +000019207 Bool size_OK = False;
19208 if ( sz == 4 )
19209 size_OK = True;
19210 else if ( sz == 8 )
19211 size_OK = redundantREXWok;
19212 else if ( sz == 2 ) {
19213 int mod_rm = getUChar(delta+0);
19214 int reg = gregLO3ofRM(mod_rm);
19215 /* The HotSpot JVM uses these */
19216 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
19217 reg == 4 /* FNSAVE */ ||
19218 reg == 6 /* FRSTOR */ ) )
19219 size_OK = True;
sewardj80611e32012-01-20 13:07:24 +000019220 }
sewardj9ae42a72012-02-16 14:18:56 +000019221 /* AMD manual says 0x66 size override is ignored, except where
19222 it is meaningful */
19223 if (!size_OK)
19224 goto decode_failure;
19225
19226 Bool decode_OK = False;
19227 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
19228 if (!decode_OK)
19229 goto decode_failure;
19230
sewardj80611e32012-01-20 13:07:24 +000019231 return delta;
19232 }
19233
19234 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
19235 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
19236 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
19237 { /* The docs say this uses rCX as a count depending on the
19238 address size override, not the operand one. */
19239 IRExpr* zbit = NULL;
19240 IRExpr* count = NULL;
19241 IRExpr* cond = NULL;
19242 HChar* xtra = NULL;
19243
19244 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
19245 /* So at this point we've rejected any variants which appear to
19246 be governed by the usual operand-size modifiers. Hence only
19247 the address size prefix can have an effect. It changes the
19248 size from 64 (default) to 32. */
19249 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
19250 delta++;
19251 if (haveASO(pfx)) {
19252 /* 64to32 of 64-bit get is merely a get-put improvement
19253 trick. */
19254 putIReg32(R_RCX, binop(Iop_Sub32,
19255 unop(Iop_64to32, getIReg64(R_RCX)),
19256 mkU32(1)));
19257 } else {
19258 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
19259 }
19260
19261 /* This is correct, both for 32- and 64-bit versions. If we're
19262 doing a 32-bit dec and the result is zero then the default
19263 zero extension rule will cause the upper 32 bits to be zero
19264 too. Hence a 64-bit check against zero is OK. */
19265 count = getIReg64(R_RCX);
19266 cond = binop(Iop_CmpNE64, count, mkU64(0));
19267 switch (opc) {
19268 case 0xE2:
19269 xtra = "";
19270 break;
19271 case 0xE1:
19272 xtra = "e";
19273 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
19274 cond = mkAnd1(cond, zbit);
19275 break;
19276 case 0xE0:
19277 xtra = "ne";
19278 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
19279 cond = mkAnd1(cond, zbit);
19280 break;
19281 default:
19282 vassert(0);
19283 }
sewardjc6f970f2012-04-02 21:54:49 +000019284 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
sewardj80611e32012-01-20 13:07:24 +000019285
19286 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
19287 return delta;
19288 }
19289
19290 case 0xE3:
19291 /* JRCXZ or JECXZ, depending address size override. */
19292 if (have66orF2orF3(pfx)) goto decode_failure;
19293 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
19294 delta++;
19295 if (haveASO(pfx)) {
19296 /* 32-bit */
19297 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
sewardjc6f970f2012-04-02 21:54:49 +000019298 unop(Iop_32Uto64, getIReg32(R_RCX)),
19299 mkU64(0)),
19300 Ijk_Boring,
19301 IRConst_U64(d64),
19302 OFFB_RIP
19303 ));
sewardj80611e32012-01-20 13:07:24 +000019304 DIP("jecxz 0x%llx\n", d64);
19305 } else {
19306 /* 64-bit */
19307 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
19308 getIReg64(R_RCX),
19309 mkU64(0)),
sewardjc6f970f2012-04-02 21:54:49 +000019310 Ijk_Boring,
19311 IRConst_U64(d64),
19312 OFFB_RIP
19313 ));
sewardj80611e32012-01-20 13:07:24 +000019314 DIP("jrcxz 0x%llx\n", d64);
19315 }
19316 return delta;
19317
19318 case 0xE4: /* IN imm8, AL */
19319 sz = 1;
19320 t1 = newTemp(Ity_I64);
19321 abyte = getUChar(delta); delta++;
19322 assign(t1, mkU64( abyte & 0xFF ));
19323 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
19324 goto do_IN;
19325 case 0xE5: /* IN imm8, eAX */
19326 if (!(sz == 2 || sz == 4)) goto decode_failure;
19327 t1 = newTemp(Ity_I64);
19328 abyte = getUChar(delta); delta++;
19329 assign(t1, mkU64( abyte & 0xFF ));
19330 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
19331 goto do_IN;
19332 case 0xEC: /* IN %DX, AL */
19333 sz = 1;
19334 t1 = newTemp(Ity_I64);
19335 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
19336 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
19337 nameIRegRAX(sz));
19338 goto do_IN;
19339 case 0xED: /* IN %DX, eAX */
19340 if (!(sz == 2 || sz == 4)) goto decode_failure;
19341 t1 = newTemp(Ity_I64);
19342 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
19343 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
19344 nameIRegRAX(sz));
19345 goto do_IN;
19346 do_IN: {
19347 /* At this point, sz indicates the width, and t1 is a 64-bit
19348 value giving port number. */
19349 IRDirty* d;
19350 if (haveF2orF3(pfx)) goto decode_failure;
19351 vassert(sz == 1 || sz == 2 || sz == 4);
19352 ty = szToITy(sz);
19353 t2 = newTemp(Ity_I64);
19354 d = unsafeIRDirty_1_N(
19355 t2,
19356 0/*regparms*/,
19357 "amd64g_dirtyhelper_IN",
19358 &amd64g_dirtyhelper_IN,
19359 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
19360 );
19361 /* do the call, dumping the result in t2. */
19362 stmt( IRStmt_Dirty(d) );
19363 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
19364 return delta;
19365 }
19366
19367 case 0xE6: /* OUT AL, imm8 */
19368 sz = 1;
19369 t1 = newTemp(Ity_I64);
19370 abyte = getUChar(delta); delta++;
19371 assign( t1, mkU64( abyte & 0xFF ) );
19372 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
19373 goto do_OUT;
19374 case 0xE7: /* OUT eAX, imm8 */
19375 if (!(sz == 2 || sz == 4)) goto decode_failure;
19376 t1 = newTemp(Ity_I64);
19377 abyte = getUChar(delta); delta++;
19378 assign( t1, mkU64( abyte & 0xFF ) );
19379 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
19380 goto do_OUT;
19381 case 0xEE: /* OUT AL, %DX */
19382 sz = 1;
19383 t1 = newTemp(Ity_I64);
19384 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
19385 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
19386 nameIRegRDX(2));
19387 goto do_OUT;
19388 case 0xEF: /* OUT eAX, %DX */
19389 if (!(sz == 2 || sz == 4)) goto decode_failure;
19390 t1 = newTemp(Ity_I64);
19391 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
19392 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
19393 nameIRegRDX(2));
19394 goto do_OUT;
19395 do_OUT: {
19396 /* At this point, sz indicates the width, and t1 is a 64-bit
19397 value giving port number. */
19398 IRDirty* d;
19399 if (haveF2orF3(pfx)) goto decode_failure;
19400 vassert(sz == 1 || sz == 2 || sz == 4);
19401 ty = szToITy(sz);
19402 d = unsafeIRDirty_0_N(
19403 0/*regparms*/,
19404 "amd64g_dirtyhelper_OUT",
19405 &amd64g_dirtyhelper_OUT,
19406 mkIRExprVec_3( mkexpr(t1),
19407 widenUto64( getIRegRAX(sz) ),
19408 mkU64(sz) )
19409 );
19410 stmt( IRStmt_Dirty(d) );
19411 return delta;
19412 }
19413
19414 case 0xE8: /* CALL J4 */
19415 if (haveF2orF3(pfx)) goto decode_failure;
19416 d64 = getSDisp32(delta); delta += 4;
19417 d64 += (guest_RIP_bbstart+delta);
19418 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
19419 t1 = newTemp(Ity_I64);
19420 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
19421 putIReg64(R_RSP, mkexpr(t1));
19422 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
19423 t2 = newTemp(Ity_I64);
19424 assign(t2, mkU64((Addr64)d64));
19425 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
19426 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
19427 /* follow into the call target. */
19428 dres->whatNext = Dis_ResteerU;
19429 dres->continueAt = d64;
19430 } else {
sewardjc6f970f2012-04-02 21:54:49 +000019431 jmp_lit(dres, Ijk_Call, d64);
19432 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019433 }
19434 DIP("call 0x%llx\n",d64);
19435 return delta;
19436
19437 case 0xE9: /* Jv (jump, 16/32 offset) */
19438 if (haveF2orF3(pfx)) goto decode_failure;
19439 if (sz != 4)
19440 goto decode_failure; /* JRS added 2004 July 11 */
19441 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
19442 delta += sz;
19443 if (resteerOkFn(callback_opaque,d64)) {
19444 dres->whatNext = Dis_ResteerU;
19445 dres->continueAt = d64;
19446 } else {
sewardjc6f970f2012-04-02 21:54:49 +000019447 jmp_lit(dres, Ijk_Boring, d64);
19448 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019449 }
19450 DIP("jmp 0x%llx\n", d64);
19451 return delta;
19452
19453 case 0xEB: /* Jb (jump, byte offset) */
19454 if (haveF2orF3(pfx)) goto decode_failure;
19455 if (sz != 4)
19456 goto decode_failure; /* JRS added 2004 July 11 */
19457 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
19458 delta++;
19459 if (resteerOkFn(callback_opaque,d64)) {
19460 dres->whatNext = Dis_ResteerU;
19461 dres->continueAt = d64;
19462 } else {
sewardjc6f970f2012-04-02 21:54:49 +000019463 jmp_lit(dres, Ijk_Boring, d64);
19464 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019465 }
19466 DIP("jmp-8 0x%llx\n", d64);
19467 return delta;
19468
19469 case 0xF5: /* CMC */
19470 case 0xF8: /* CLC */
19471 case 0xF9: /* STC */
19472 t1 = newTemp(Ity_I64);
19473 t2 = newTemp(Ity_I64);
19474 assign( t1, mk_amd64g_calculate_rflags_all() );
19475 switch (opc) {
19476 case 0xF5:
19477 assign( t2, binop(Iop_Xor64, mkexpr(t1),
19478 mkU64(AMD64G_CC_MASK_C)));
19479 DIP("cmc\n");
19480 break;
19481 case 0xF8:
19482 assign( t2, binop(Iop_And64, mkexpr(t1),
19483 mkU64(~AMD64G_CC_MASK_C)));
19484 DIP("clc\n");
19485 break;
19486 case 0xF9:
19487 assign( t2, binop(Iop_Or64, mkexpr(t1),
19488 mkU64(AMD64G_CC_MASK_C)));
19489 DIP("stc\n");
19490 break;
19491 default:
19492 vpanic("disInstr(x64)(cmc/clc/stc)");
19493 }
19494 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
19495 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
19496 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
19497 /* Set NDEP even though it isn't used. This makes redundant-PUT
19498 elimination of previous stores to this field work better. */
19499 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
19500 return delta;
19501
19502 case 0xF6: { /* Grp3 Eb */
19503 Bool decode_OK = True;
19504 if (haveF2orF3(pfx)) goto decode_failure;
19505 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
19506 if (!decode_OK) goto decode_failure;
19507 return delta;
19508 }
19509
19510 case 0xF7: { /* Grp3 Ev */
19511 Bool decode_OK = True;
19512 if (haveF2orF3(pfx)) goto decode_failure;
19513 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
19514 if (!decode_OK) goto decode_failure;
19515 return delta;
19516 }
19517
19518 case 0xFC: /* CLD */
19519 if (haveF2orF3(pfx)) goto decode_failure;
19520 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
19521 DIP("cld\n");
19522 return delta;
19523
19524 case 0xFD: /* STD */
19525 if (haveF2orF3(pfx)) goto decode_failure;
19526 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
19527 DIP("std\n");
19528 return delta;
19529
19530 case 0xFE: { /* Grp4 Eb */
19531 Bool decode_OK = True;
19532 if (haveF2orF3(pfx)) goto decode_failure;
19533 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
19534 if (!decode_OK) goto decode_failure;
19535 return delta;
19536 }
19537
19538 case 0xFF: { /* Grp5 Ev */
19539 Bool decode_OK = True;
19540 if (haveF2orF3(pfx)) goto decode_failure;
19541 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
19542 if (!decode_OK) goto decode_failure;
19543 return delta;
19544 }
19545
19546 default:
19547 break;
19548
19549 }
19550
19551 decode_failure:
19552 return deltaIN; /* fail */
19553}
19554
19555
19556/*------------------------------------------------------------*/
19557/*--- ---*/
19558/*--- Top-level post-escape decoders: dis_ESC_0F ---*/
19559/*--- ---*/
19560/*------------------------------------------------------------*/
19561
sewardjf85e1772012-07-15 10:11:10 +000019562static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
19563{
19564 IRTemp t2 = newTemp(ty);
19565 if (ty == Ity_I64) {
19566 IRTemp m8 = newTemp(Ity_I64);
19567 IRTemp s8 = newTemp(Ity_I64);
19568 IRTemp m16 = newTemp(Ity_I64);
19569 IRTemp s16 = newTemp(Ity_I64);
19570 IRTemp m32 = newTemp(Ity_I64);
19571 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
19572 assign( s8,
19573 binop(Iop_Or64,
19574 binop(Iop_Shr64,
19575 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
19576 mkU8(8)),
19577 binop(Iop_And64,
19578 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
19579 mkexpr(m8))
19580 )
19581 );
19582
19583 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
19584 assign( s16,
19585 binop(Iop_Or64,
19586 binop(Iop_Shr64,
19587 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
19588 mkU8(16)),
19589 binop(Iop_And64,
19590 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
19591 mkexpr(m16))
19592 )
19593 );
19594
19595 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
19596 assign( t2,
19597 binop(Iop_Or64,
19598 binop(Iop_Shr64,
19599 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
19600 mkU8(32)),
19601 binop(Iop_And64,
19602 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
19603 mkexpr(m32))
19604 )
19605 );
19606 return t2;
19607 }
19608 if (ty == Ity_I32) {
19609 assign( t2,
19610 binop(
19611 Iop_Or32,
19612 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
19613 binop(
19614 Iop_Or32,
19615 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
19616 mkU32(0x00FF0000)),
19617 binop(Iop_Or32,
19618 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
19619 mkU32(0x0000FF00)),
19620 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
19621 mkU32(0x000000FF) )
19622 )))
19623 );
19624 return t2;
19625 }
19626 if (ty == Ity_I16) {
19627 assign(t2,
19628 binop(Iop_Or16,
19629 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
19630 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
19631 return t2;
19632 }
19633 vassert(0);
19634 /*NOTREACHED*/
19635 return IRTemp_INVALID;
19636}
19637
19638
sewardj80611e32012-01-20 13:07:24 +000019639__attribute__((noinline))
19640static
19641Long dis_ESC_0F (
19642 /*MB_OUT*/DisResult* dres,
19643 /*MB_OUT*/Bool* expect_CAS,
19644 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
19645 Bool resteerCisOk,
19646 void* callback_opaque,
19647 VexArchInfo* archinfo,
19648 VexAbiInfo* vbi,
19649 Prefix pfx, Int sz, Long deltaIN
19650 )
19651{
19652 Long d64 = 0;
19653 IRTemp addr = IRTemp_INVALID;
19654 IRTemp t1 = IRTemp_INVALID;
19655 IRTemp t2 = IRTemp_INVALID;
19656 UChar modrm = 0;
19657 Int am_sz = 0;
19658 Int alen = 0;
19659 HChar dis_buf[50];
19660
19661 /* In the first switch, look for ordinary integer insns. */
19662 Long delta = deltaIN;
19663 UChar opc = getUChar(delta);
19664 delta++;
19665 switch (opc) { /* first switch */
19666
sewardjfe0c5e72012-06-15 15:48:07 +000019667 case 0x01:
sewardj80611e32012-01-20 13:07:24 +000019668 {
sewardj80611e32012-01-20 13:07:24 +000019669 modrm = getUChar(delta);
sewardjfe0c5e72012-06-15 15:48:07 +000019670 /* 0F 01 /0 -- SGDT */
19671 /* 0F 01 /1 -- SIDT */
19672 if (!epartIsReg(modrm)
19673 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
19674 /* This is really revolting, but ... since each processor
19675 (core) only has one IDT and one GDT, just let the guest
19676 see it (pass-through semantics). I can't see any way to
19677 construct a faked-up value, so don't bother to try. */
19678 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19679 delta += alen;
19680 switch (gregLO3ofRM(modrm)) {
19681 case 0: DIP("sgdt %s\n", dis_buf); break;
19682 case 1: DIP("sidt %s\n", dis_buf); break;
19683 default: vassert(0); /*NOTREACHED*/
19684 }
19685 IRDirty* d = unsafeIRDirty_0_N (
19686 0/*regparms*/,
19687 "amd64g_dirtyhelper_SxDT",
19688 &amd64g_dirtyhelper_SxDT,
19689 mkIRExprVec_2( mkexpr(addr),
19690 mkU64(gregLO3ofRM(modrm)) )
19691 );
19692 /* declare we're writing memory */
19693 d->mFx = Ifx_Write;
19694 d->mAddr = mkexpr(addr);
19695 d->mSize = 6;
19696 stmt( IRStmt_Dirty(d) );
19697 return delta;
sewardj80611e32012-01-20 13:07:24 +000019698 }
sewardjfe0c5e72012-06-15 15:48:07 +000019699 /* 0F 01 D0 = XGETBV */
19700 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
19701 delta += 1;
19702 DIP("xgetbv\n");
19703 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
19704 am not sure if that translates in to SEGV or to something
19705 else, in user space. */
19706 t1 = newTemp(Ity_I32);
19707 assign( t1, getIReg32(R_RCX) );
19708 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
19709 Ijk_SigSEGV,
19710 IRConst_U64(guest_RIP_curr_instr),
19711 OFFB_RIP
19712 ));
19713 putIRegRAX(4, mkU32(7));
19714 putIRegRDX(4, mkU32(0));
19715 return delta;
19716 }
19717 /* else decode failed */
19718 break;
sewardj80611e32012-01-20 13:07:24 +000019719 }
19720
19721 case 0x05: /* SYSCALL */
19722 guest_RIP_next_mustcheck = True;
19723 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
19724 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
19725 /* It's important that all guest state is up-to-date
19726 at this point. So we declare an end-of-block here, which
19727 forces any cached guest state to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000019728 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
19729 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019730 DIP("syscall\n");
19731 return delta;
19732
sewardj47855302012-07-14 14:31:17 +000019733 case 0x0B: /* UD2 */
19734 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
19735 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
19736 vassert(dres->whatNext == Dis_StopHere);
19737 DIP("ud2\n");
19738 return delta;
19739
sewardj80611e32012-01-20 13:07:24 +000019740 case 0x1F:
19741 if (haveF2orF3(pfx)) goto decode_failure;
19742 modrm = getUChar(delta);
19743 if (epartIsReg(modrm)) goto decode_failure;
19744 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19745 delta += alen;
19746 DIP("nop%c %s\n", nameISize(sz), dis_buf);
19747 return delta;
19748
19749 case 0x31: { /* RDTSC */
19750 IRTemp val = newTemp(Ity_I64);
19751 IRExpr** args = mkIRExprVec_0();
19752 IRDirty* d = unsafeIRDirty_1_N (
19753 val,
19754 0/*regparms*/,
19755 "amd64g_dirtyhelper_RDTSC",
19756 &amd64g_dirtyhelper_RDTSC,
19757 args
19758 );
19759 if (have66orF2orF3(pfx)) goto decode_failure;
19760 /* execute the dirty call, dumping the result in val. */
19761 stmt( IRStmt_Dirty(d) );
19762 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
19763 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
19764 DIP("rdtsc\n");
19765 return delta;
19766 }
19767
19768 case 0x40:
19769 case 0x41:
19770 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
19771 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
19772 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
19773 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
19774 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
19775 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
19776 case 0x48: /* CMOVSb (cmov negative) */
19777 case 0x49: /* CMOVSb (cmov not negative) */
19778 case 0x4A: /* CMOVP (cmov parity even) */
19779 case 0x4B: /* CMOVNP (cmov parity odd) */
19780 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
19781 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
19782 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
19783 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
19784 if (haveF2orF3(pfx)) goto decode_failure;
19785 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
19786 return delta;
19787
19788 case 0x80:
19789 case 0x81:
19790 case 0x82: /* JBb/JNAEb (jump below) */
19791 case 0x83: /* JNBb/JAEb (jump not below) */
19792 case 0x84: /* JZb/JEb (jump zero) */
19793 case 0x85: /* JNZb/JNEb (jump not zero) */
19794 case 0x86: /* JBEb/JNAb (jump below or equal) */
19795 case 0x87: /* JNBEb/JAb (jump not below or equal) */
19796 case 0x88: /* JSb (jump negative) */
19797 case 0x89: /* JSb (jump not negative) */
19798 case 0x8A: /* JP (jump parity even) */
19799 case 0x8B: /* JNP/JPO (jump parity odd) */
19800 case 0x8C: /* JLb/JNGEb (jump less) */
19801 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
19802 case 0x8E: /* JLEb/JNGb (jump less or equal) */
19803 case 0x8F: { /* JGb/JNLEb (jump greater) */
19804 Long jmpDelta;
19805 HChar* comment = "";
19806 if (haveF2orF3(pfx)) goto decode_failure;
19807 jmpDelta = getSDisp32(delta);
19808 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
19809 delta += 4;
19810 if (resteerCisOk
19811 && vex_control.guest_chase_cond
19812 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19813 && jmpDelta < 0
19814 && resteerOkFn( callback_opaque, d64) ) {
19815 /* Speculation: assume this backward branch is taken. So
19816 we need to emit a side-exit to the insn following this
19817 one, on the negation of the condition, and continue at
19818 the branch target address (d64). If we wind up back at
19819 the first instruction of the trace, just stop; it's
19820 better to let the IR loop unroller handle that case. */
19821 stmt( IRStmt_Exit(
19822 mk_amd64g_calculate_condition(
19823 (AMD64Condcode)(1 ^ (opc - 0x80))),
19824 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019825 IRConst_U64(guest_RIP_bbstart+delta),
19826 OFFB_RIP
19827 ));
sewardj80611e32012-01-20 13:07:24 +000019828 dres->whatNext = Dis_ResteerC;
19829 dres->continueAt = d64;
19830 comment = "(assumed taken)";
19831 }
19832 else
19833 if (resteerCisOk
19834 && vex_control.guest_chase_cond
19835 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19836 && jmpDelta >= 0
19837 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
19838 /* Speculation: assume this forward branch is not taken.
19839 So we need to emit a side-exit to d64 (the dest) and
19840 continue disassembling at the insn immediately
19841 following this one. */
19842 stmt( IRStmt_Exit(
19843 mk_amd64g_calculate_condition((AMD64Condcode)
19844 (opc - 0x80)),
19845 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019846 IRConst_U64(d64),
19847 OFFB_RIP
19848 ));
sewardj80611e32012-01-20 13:07:24 +000019849 dres->whatNext = Dis_ResteerC;
19850 dres->continueAt = guest_RIP_bbstart+delta;
19851 comment = "(assumed not taken)";
19852 }
19853 else {
19854 /* Conservative default translation - end the block at
19855 this point. */
sewardjc6f970f2012-04-02 21:54:49 +000019856 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
19857 guest_RIP_bbstart+delta, d64 );
19858 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019859 }
19860 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
19861 return delta;
19862 }
19863
19864 case 0x90:
19865 case 0x91:
19866 case 0x92: /* set-Bb/set-NAEb (set if below) */
19867 case 0x93: /* set-NBb/set-AEb (set if not below) */
19868 case 0x94: /* set-Zb/set-Eb (set if zero) */
19869 case 0x95: /* set-NZb/set-NEb (set if not zero) */
19870 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
19871 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
19872 case 0x98: /* set-Sb (set if negative) */
19873 case 0x99: /* set-Sb (set if not negative) */
19874 case 0x9A: /* set-P (set if parity even) */
19875 case 0x9B: /* set-NP (set if parity odd) */
19876 case 0x9C: /* set-Lb/set-NGEb (set if less) */
19877 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
19878 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
19879 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
19880 if (haveF2orF3(pfx)) goto decode_failure;
19881 t1 = newTemp(Ity_I8);
19882 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
19883 modrm = getUChar(delta);
19884 if (epartIsReg(modrm)) {
19885 delta++;
19886 putIRegE(1, pfx, modrm, mkexpr(t1));
19887 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
19888 nameIRegE(1,pfx,modrm));
19889 } else {
19890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19891 delta += alen;
19892 storeLE( mkexpr(addr), mkexpr(t1) );
19893 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
19894 }
19895 return delta;
19896
19897 case 0xA2: { /* CPUID */
19898 /* Uses dirty helper:
19899 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
19900 declared to mod rax, wr rbx, rcx, rdx
19901 */
19902 IRDirty* d = NULL;
19903 HChar* fName = NULL;
19904 void* fAddr = NULL;
19905 if (haveF2orF3(pfx)) goto decode_failure;
sewardjf1d78fb2012-06-25 07:58:53 +000019906 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
sewardjfe0c5e72012-06-15 15:48:07 +000019907 |VEX_HWCAPS_AMD64_CX16
19908 |VEX_HWCAPS_AMD64_AVX)) {
19909 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
19910 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
19911 /* This is a Core-i5-2300-like machine */
19912 }
19913 else if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
19914 |VEX_HWCAPS_AMD64_CX16)) {
sewardj80611e32012-01-20 13:07:24 +000019915 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
19916 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
sewardjfe0c5e72012-06-15 15:48:07 +000019917 /* This is a Core-i5-670-like machine */
sewardj80611e32012-01-20 13:07:24 +000019918 }
19919 else {
19920 /* Give a CPUID for at least a baseline machine, SSE2
19921 only, and no CX16 */
19922 fName = "amd64g_dirtyhelper_CPUID_baseline";
19923 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
19924 }
19925
19926 vassert(fName); vassert(fAddr);
19927 d = unsafeIRDirty_0_N ( 0/*regparms*/,
19928 fName, fAddr, mkIRExprVec_0() );
19929 /* declare guest state effects */
19930 d->needsBBP = True;
19931 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +000019932 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000019933 d->fxState[0].fx = Ifx_Modify;
19934 d->fxState[0].offset = OFFB_RAX;
19935 d->fxState[0].size = 8;
19936 d->fxState[1].fx = Ifx_Write;
19937 d->fxState[1].offset = OFFB_RBX;
19938 d->fxState[1].size = 8;
19939 d->fxState[2].fx = Ifx_Modify;
19940 d->fxState[2].offset = OFFB_RCX;
19941 d->fxState[2].size = 8;
19942 d->fxState[3].fx = Ifx_Write;
19943 d->fxState[3].offset = OFFB_RDX;
19944 d->fxState[3].size = 8;
19945 /* execute the dirty call, side-effecting guest state */
19946 stmt( IRStmt_Dirty(d) );
19947 /* CPUID is a serialising insn. So, just in case someone is
19948 using it as a memory fence ... */
19949 stmt( IRStmt_MBE(Imbe_Fence) );
19950 DIP("cpuid\n");
19951 return delta;
19952 }
19953
19954 case 0xA3: /* BT Gv,Ev */
19955 if (haveF2orF3(pfx)) goto decode_failure;
19956 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
19957 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone );
19958 return delta;
19959
19960 case 0xA4: /* SHLDv imm8,Gv,Ev */
19961 modrm = getUChar(delta);
19962 d64 = delta + lengthAMode(pfx, delta);
19963 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
19964 delta = dis_SHLRD_Gv_Ev (
19965 vbi, pfx, delta, modrm, sz,
19966 mkU8(getUChar(d64)), True, /* literal */
19967 dis_buf, True /* left */ );
19968 return delta;
19969
19970 case 0xA5: /* SHLDv %cl,Gv,Ev */
19971 modrm = getUChar(delta);
19972 delta = dis_SHLRD_Gv_Ev (
19973 vbi, pfx, delta, modrm, sz,
19974 getIRegCL(), False, /* not literal */
19975 "%cl", True /* left */ );
19976 return delta;
19977
19978 case 0xAB: /* BTS Gv,Ev */
19979 if (haveF2orF3(pfx)) goto decode_failure;
19980 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
19981 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet );
19982 return delta;
19983
19984 case 0xAC: /* SHRDv imm8,Gv,Ev */
19985 modrm = getUChar(delta);
19986 d64 = delta + lengthAMode(pfx, delta);
19987 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
19988 delta = dis_SHLRD_Gv_Ev (
19989 vbi, pfx, delta, modrm, sz,
19990 mkU8(getUChar(d64)), True, /* literal */
19991 dis_buf, False /* right */ );
19992 return delta;
19993
19994 case 0xAD: /* SHRDv %cl,Gv,Ev */
19995 modrm = getUChar(delta);
19996 delta = dis_SHLRD_Gv_Ev (
19997 vbi, pfx, delta, modrm, sz,
19998 getIRegCL(), False, /* not literal */
19999 "%cl", False /* right */);
20000 return delta;
20001
20002 case 0xAF: /* IMUL Ev, Gv */
20003 if (haveF2orF3(pfx)) goto decode_failure;
20004 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
20005 return delta;
20006
20007 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
20008 Bool ok = True;
20009 if (haveF2orF3(pfx)) goto decode_failure;
20010 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
20011 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
20012 if (!ok) goto decode_failure;
20013 return delta;
20014 }
20015
20016 case 0xB0: { /* CMPXCHG Gb,Eb */
20017 Bool ok = True;
20018 if (haveF2orF3(pfx)) goto decode_failure;
20019 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
20020 if (!ok) goto decode_failure;
20021 return delta;
20022 }
20023
20024 case 0xB3: /* BTR Gv,Ev */
20025 if (haveF2orF3(pfx)) goto decode_failure;
20026 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
20027 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset );
20028 return delta;
20029
20030 case 0xB6: /* MOVZXb Eb,Gv */
20031 if (haveF2orF3(pfx)) goto decode_failure;
20032 if (sz != 2 && sz != 4 && sz != 8)
20033 goto decode_failure;
20034 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
20035 return delta;
20036
20037 case 0xB7: /* MOVZXw Ew,Gv */
20038 if (haveF2orF3(pfx)) goto decode_failure;
20039 if (sz != 4 && sz != 8)
20040 goto decode_failure;
20041 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
20042 return delta;
20043
20044 case 0xBA: { /* Grp8 Ib,Ev */
20045 Bool decode_OK = False;
20046 if (haveF2orF3(pfx)) goto decode_failure;
20047 modrm = getUChar(delta);
20048 am_sz = lengthAMode(pfx,delta);
20049 d64 = getSDisp8(delta + am_sz);
20050 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
20051 &decode_OK );
20052 if (!decode_OK)
20053 goto decode_failure;
20054 return delta;
20055 }
20056
20057 case 0xBB: /* BTC Gv,Ev */
20058 if (haveF2orF3(pfx)) goto decode_failure;
20059 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
20060 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp );
20061 return delta;
20062
20063 case 0xBC: /* BSF Gv,Ev */
20064 if (haveF2orF3(pfx)) goto decode_failure;
20065 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
20066 return delta;
20067
20068 case 0xBD: /* BSR Gv,Ev */
20069 if (!haveF2orF3(pfx)) {
20070 /* no-F2 no-F3 0F BD = BSR */
20071 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
20072 return delta;
20073 }
20074 /* Fall through, since F3 0F BD is LZCNT, and needs to
20075 be handled by dis_ESC_0F__SSE4. */
20076 break;
20077
20078 case 0xBE: /* MOVSXb Eb,Gv */
20079 if (haveF2orF3(pfx)) goto decode_failure;
20080 if (sz != 2 && sz != 4 && sz != 8)
20081 goto decode_failure;
20082 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
20083 return delta;
20084
20085 case 0xBF: /* MOVSXw Ew,Gv */
20086 if (haveF2orF3(pfx)) goto decode_failure;
20087 if (sz != 4 && sz != 8)
20088 goto decode_failure;
20089 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
20090 return delta;
20091
20092 case 0xC1: { /* XADD Gv,Ev */
20093 Bool decode_OK = False;
20094 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
20095 if (!decode_OK)
20096 goto decode_failure;
20097 return delta;
20098 }
20099
20100 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
20101 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
20102 IRTemp expdHi = newTemp(elemTy);
20103 IRTemp expdLo = newTemp(elemTy);
20104 IRTemp dataHi = newTemp(elemTy);
20105 IRTemp dataLo = newTemp(elemTy);
20106 IRTemp oldHi = newTemp(elemTy);
20107 IRTemp oldLo = newTemp(elemTy);
20108 IRTemp flags_old = newTemp(Ity_I64);
20109 IRTemp flags_new = newTemp(Ity_I64);
20110 IRTemp success = newTemp(Ity_I1);
20111 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
20112 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
20113 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
20114 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
20115 IRTemp expdHi64 = newTemp(Ity_I64);
20116 IRTemp expdLo64 = newTemp(Ity_I64);
20117
20118 /* Translate this using a DCAS, even if there is no LOCK
20119 prefix. Life is too short to bother with generating two
20120 different translations for the with/without-LOCK-prefix
20121 cases. */
20122 *expect_CAS = True;
20123
20124 /* Decode, and generate address. */
20125 if (have66orF2orF3(pfx)) goto decode_failure;
20126 if (sz != 4 && sz != 8) goto decode_failure;
20127 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
20128 goto decode_failure;
20129 modrm = getUChar(delta);
20130 if (epartIsReg(modrm)) goto decode_failure;
20131 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
20132 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20133 delta += alen;
20134
20135 /* cmpxchg16b requires an alignment check. */
20136 if (sz == 8)
20137 gen_SEGV_if_not_16_aligned( addr );
20138
20139 /* Get the expected and new values. */
20140 assign( expdHi64, getIReg64(R_RDX) );
20141 assign( expdLo64, getIReg64(R_RAX) );
20142
20143 /* These are the correctly-sized expected and new values.
20144 However, we also get expdHi64/expdLo64 above as 64-bits
20145 regardless, because we will need them later in the 32-bit
20146 case (paradoxically). */
20147 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
20148 : mkexpr(expdHi64) );
20149 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
20150 : mkexpr(expdLo64) );
20151 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
20152 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
20153
20154 /* Do the DCAS */
20155 stmt( IRStmt_CAS(
20156 mkIRCAS( oldHi, oldLo,
20157 Iend_LE, mkexpr(addr),
20158 mkexpr(expdHi), mkexpr(expdLo),
20159 mkexpr(dataHi), mkexpr(dataLo)
20160 )));
20161
20162 /* success when oldHi:oldLo == expdHi:expdLo */
20163 assign( success,
20164 binop(opCasCmpEQ,
20165 binop(opOR,
20166 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
20167 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
20168 ),
20169 zero
20170 ));
20171
20172 /* If the DCAS is successful, that is to say oldHi:oldLo ==
20173 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
20174 which is where they came from originally. Both the actual
20175 contents of these two regs, and any shadow values, are
20176 unchanged. If the DCAS fails then we're putting into
20177 RDX:RAX the value seen in memory. */
20178 /* Now of course there's a complication in the 32-bit case
20179 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
20180 unchanged; but if we use the same scheme as in the 64-bit
20181 case, we get hit by the standard rule that a write to the
20182 bottom 32 bits of an integer register zeros the upper 32
20183 bits. And so the upper halves of RDX and RAX mysteriously
20184 become zero. So we have to stuff back in the original
20185 64-bit values which we previously stashed in
20186 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
20187 /* It's just _so_ much fun ... */
20188 putIRegRDX( 8,
20189 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
20190 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
20191 : mkexpr(oldHi),
20192 mkexpr(expdHi64)
20193 ));
20194 putIRegRAX( 8,
20195 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
20196 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
20197 : mkexpr(oldLo),
20198 mkexpr(expdLo64)
20199 ));
20200
20201 /* Copy the success bit into the Z flag and leave the others
20202 unchanged */
20203 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
20204 assign(
20205 flags_new,
20206 binop(Iop_Or64,
20207 binop(Iop_And64, mkexpr(flags_old),
20208 mkU64(~AMD64G_CC_MASK_Z)),
20209 binop(Iop_Shl64,
20210 binop(Iop_And64,
20211 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
20212 mkU8(AMD64G_CC_SHIFT_Z)) ));
20213
20214 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20215 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
20216 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20217 /* Set NDEP even though it isn't used. This makes
20218 redundant-PUT elimination of previous stores to this field
20219 work better. */
20220 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
20221
20222 /* Sheesh. Aren't you glad it was me and not you that had to
20223 write and validate all this grunge? */
20224
20225 DIP("cmpxchg8b %s\n", dis_buf);
20226 return delta;
20227 }
20228
20229 case 0xC8: /* BSWAP %eax */
20230 case 0xC9:
20231 case 0xCA:
20232 case 0xCB:
20233 case 0xCC:
20234 case 0xCD:
20235 case 0xCE:
20236 case 0xCF: /* BSWAP %edi */
20237 if (haveF2orF3(pfx)) goto decode_failure;
20238 /* According to the AMD64 docs, this insn can have size 4 or
20239 8. */
20240 if (sz == 4) {
20241 t1 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000020242 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000020243 t2 = math_BSWAP( t1, Ity_I32 );
sewardj80611e32012-01-20 13:07:24 +000020244 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
20245 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
20246 return delta;
20247 }
20248 if (sz == 8) {
sewardj80611e32012-01-20 13:07:24 +000020249 t1 = newTemp(Ity_I64);
20250 t2 = newTemp(Ity_I64);
20251 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000020252 t2 = math_BSWAP( t1, Ity_I64 );
sewardj80611e32012-01-20 13:07:24 +000020253 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
20254 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
20255 return delta;
20256 }
20257 goto decode_failure;
20258
20259 default:
20260 break;
20261
20262 } /* first switch */
20263
20264
20265 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
20266 /* In the second switch, pick off MMX insns. */
20267
20268 if (!have66orF2orF3(pfx)) {
20269 /* So there's no SIMD prefix. */
20270
20271 vassert(sz == 4 || sz == 8);
20272
20273 switch (opc) { /* second switch */
20274
20275 case 0x71:
20276 case 0x72:
20277 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
20278
20279 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
20280 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
20281 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
20282 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
20283
20284 case 0xFC:
20285 case 0xFD:
20286 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
20287
20288 case 0xEC:
20289 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
20290
20291 case 0xDC:
20292 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
20293
20294 case 0xF8:
20295 case 0xF9:
20296 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
20297
20298 case 0xE8:
20299 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
20300
20301 case 0xD8:
20302 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
20303
20304 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
20305 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
20306
20307 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
20308
20309 case 0x74:
20310 case 0x75:
20311 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
20312
20313 case 0x64:
20314 case 0x65:
20315 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
20316
20317 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
20318 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
20319 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
20320
20321 case 0x68:
20322 case 0x69:
20323 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
20324
20325 case 0x60:
20326 case 0x61:
20327 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
20328
20329 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
20330 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
20331 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
20332 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
20333
20334 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
20335 case 0xF2:
20336 case 0xF3:
20337
20338 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
20339 case 0xD2:
20340 case 0xD3:
20341
20342 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
20343 case 0xE2: {
20344 Bool decode_OK = False;
20345 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
20346 if (decode_OK)
20347 return delta;
20348 goto decode_failure;
20349 }
20350
20351 default:
20352 break;
20353 } /* second switch */
20354
20355 }
20356
20357 /* A couple of MMX corner cases */
20358 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
20359 if (sz != 4)
20360 goto decode_failure;
20361 do_EMMS_preamble();
20362 DIP("{f}emms\n");
20363 return delta;
20364 }
20365
20366 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
20367 /* Perhaps it's an SSE or SSE2 instruction. We can try this
20368 without checking the guest hwcaps because SSE2 is a baseline
20369 facility in 64 bit mode. */
20370 {
20371 Bool decode_OK = False;
sewardj30fc0582012-02-16 13:45:13 +000020372 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres );
sewardj80611e32012-01-20 13:07:24 +000020373 if (decode_OK)
20374 return delta;
20375 }
20376
20377 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
20378 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
20379 first. */
20380 {
20381 Bool decode_OK = False;
20382 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
20383 if (decode_OK)
20384 return delta;
20385 }
20386
20387 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
20388 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
20389 first. */
20390 {
20391 Bool decode_OK = False;
20392 delta = dis_ESC_0F__SSE4 ( &decode_OK,
20393 archinfo, vbi, pfx, sz, deltaIN );
20394 if (decode_OK)
20395 return delta;
20396 }
20397
20398 decode_failure:
20399 return deltaIN; /* fail */
20400}
20401
20402
20403/*------------------------------------------------------------*/
20404/*--- ---*/
20405/*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
20406/*--- ---*/
20407/*------------------------------------------------------------*/
20408
20409__attribute__((noinline))
20410static
20411Long dis_ESC_0F38 (
20412 /*MB_OUT*/DisResult* dres,
20413 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
20414 Bool resteerCisOk,
20415 void* callback_opaque,
20416 VexArchInfo* archinfo,
20417 VexAbiInfo* vbi,
20418 Prefix pfx, Int sz, Long deltaIN
20419 )
20420{
20421 Long delta = deltaIN;
20422 UChar opc = getUChar(delta);
20423 delta++;
20424 switch (opc) {
20425
sewardj1a237be2012-07-16 08:35:31 +000020426 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
20427 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
20428 if (!haveF2orF3(pfx) && !haveVEX(pfx)
20429 && (sz == 2 || sz == 4 || sz == 8)) {
20430 IRTemp addr = IRTemp_INVALID;
20431 UChar modrm = 0;
20432 Int alen = 0;
20433 HChar dis_buf[50];
20434 modrm = getUChar(delta);
20435 if (epartIsReg(modrm)) break;
20436 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20437 delta += alen;
20438 IRType ty = szToITy(sz);
20439 IRTemp src = newTemp(ty);
20440 if (opc == 0xF0) { /* LOAD */
20441 assign(src, loadLE(ty, mkexpr(addr)));
20442 IRTemp dst = math_BSWAP(src, ty);
20443 putIRegG(sz, pfx, modrm, mkexpr(dst));
20444 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
20445 } else { /* STORE */
20446 assign(src, getIRegG(sz, pfx, modrm));
20447 IRTemp dst = math_BSWAP(src, ty);
20448 storeLE(mkexpr(addr), mkexpr(dst));
20449 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
20450 }
20451 return delta;
sewardjf85e1772012-07-15 10:11:10 +000020452 }
sewardj1a237be2012-07-16 08:35:31 +000020453 /* else fall through; maybe one of the decoders below knows what
20454 it is. */
20455 break;
sewardjf85e1772012-07-15 10:11:10 +000020456 }
20457
sewardj80611e32012-01-20 13:07:24 +000020458 default:
20459 break;
20460
20461 }
20462
20463 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
20464 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
20465 rather than proceeding indiscriminately. */
20466 {
20467 Bool decode_OK = False;
20468 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
20469 if (decode_OK)
20470 return delta;
20471 }
20472
20473 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
20474 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
20475 rather than proceeding indiscriminately. */
20476 {
20477 Bool decode_OK = False;
20478 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
20479 if (decode_OK)
20480 return delta;
20481 }
20482
sewardj1a237be2012-07-16 08:35:31 +000020483 /*decode_failure:*/
sewardj80611e32012-01-20 13:07:24 +000020484 return deltaIN; /* fail */
20485}
20486
20487
20488/*------------------------------------------------------------*/
20489/*--- ---*/
20490/*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
20491/*--- ---*/
20492/*------------------------------------------------------------*/
20493
20494__attribute__((noinline))
20495static
20496Long dis_ESC_0F3A (
20497 /*MB_OUT*/DisResult* dres,
20498 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
20499 Bool resteerCisOk,
20500 void* callback_opaque,
20501 VexArchInfo* archinfo,
20502 VexAbiInfo* vbi,
20503 Prefix pfx, Int sz, Long deltaIN
20504 )
20505{
20506 Long delta = deltaIN;
20507 UChar opc = getUChar(delta);
20508 delta++;
20509 switch (opc) {
20510
20511 default:
20512 break;
20513
20514 }
20515
20516 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
20517 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
20518 rather than proceeding indiscriminately. */
20519 {
20520 Bool decode_OK = False;
20521 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
20522 if (decode_OK)
20523 return delta;
20524 }
20525
20526 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
20527 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
20528 rather than proceeding indiscriminately. */
20529 {
20530 Bool decode_OK = False;
20531 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
20532 if (decode_OK)
20533 return delta;
20534 }
20535
sewardj80611e32012-01-20 13:07:24 +000020536 return deltaIN; /* fail */
20537}
20538
20539
20540/*------------------------------------------------------------*/
20541/*--- ---*/
sewardjc4530ae2012-05-21 10:18:49 +000020542/*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
20543/*--- ---*/
20544/*------------------------------------------------------------*/
20545
sewardj56c30312012-06-12 08:45:39 +000020546/* FIXME: common up with the _256_ version below? */
sewardjc4530ae2012-05-21 10:18:49 +000020547static
20548Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
20549 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
20550 Prefix pfx, Long delta, HChar* name,
20551 /* The actual operation. Use either 'op' or 'opfn',
20552 but not both. */
20553 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
sewardj44565e82012-05-22 09:14:15 +000020554 Bool invertLeftArg,
20555 Bool swapArgs
sewardjc4530ae2012-05-21 10:18:49 +000020556 )
20557{
20558 UChar modrm = getUChar(delta);
20559 UInt rD = gregOfRexRM(pfx, modrm);
20560 UInt rSL = getVexNvvvv(pfx);
20561 IRTemp tSL = newTemp(Ity_V128);
20562 IRTemp tSR = newTemp(Ity_V128);
20563 IRTemp addr = IRTemp_INVALID;
20564 HChar dis_buf[50];
20565 Int alen = 0;
20566 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
20567
20568 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
20569 : getXMMReg(rSL));
20570
20571 if (epartIsReg(modrm)) {
20572 UInt rSR = eregOfRexRM(pfx, modrm);
20573 delta += 1;
20574 assign(tSR, getXMMReg(rSR));
20575 DIP("%s %s,%s,%s\n",
20576 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
20577 } else {
20578 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
20579 delta += alen;
20580 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
20581 DIP("%s %s,%s,%s\n",
20582 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
20583 }
20584
20585 IRTemp res = IRTemp_INVALID;
20586 if (op != Iop_INVALID) {
20587 vassert(opFn == NULL);
20588 res = newTemp(Ity_V128);
sewardj44565e82012-05-22 09:14:15 +000020589 assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL))
20590 : binop(op, mkexpr(tSL), mkexpr(tSR)));
sewardjc4530ae2012-05-21 10:18:49 +000020591 } else {
20592 vassert(opFn != NULL);
sewardj44565e82012-05-22 09:14:15 +000020593 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
sewardjc4530ae2012-05-21 10:18:49 +000020594 }
20595
20596 putYMMRegLoAndZU(rD, mkexpr(res));
20597
20598 *uses_vvvv = True;
20599 return delta;
20600}
20601
20602
20603/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
sewardj44565e82012-05-22 09:14:15 +000020604 for the operation, no inversion of the left arg, and no swapping of
20605 args. */
sewardjc4530ae2012-05-21 10:18:49 +000020606static
20607Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
20608 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
20609 Prefix pfx, Long delta, HChar* name,
20610 IROp op
20611 )
20612{
20613 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000020614 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
sewardjc4530ae2012-05-21 10:18:49 +000020615}
20616
20617
20618/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
sewardj44565e82012-05-22 09:14:15 +000020619 generator to compute the result, no inversion of the left
20620 arg, and no swapping of args. */
sewardjc4530ae2012-05-21 10:18:49 +000020621static
20622Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
20623 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
20624 Prefix pfx, Long delta, HChar* name,
20625 IRTemp(*opFn)(IRTemp,IRTemp)
20626 )
20627{
20628 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000020629 uses_vvvv, vbi, pfx, delta, name,
20630 Iop_INVALID, opFn, False, False );
sewardjc4530ae2012-05-21 10:18:49 +000020631}
20632
20633
sewardj4c0a7ac2012-06-21 09:08:19 +000020634/* Vector by scalar shift of V by the amount specified at the bottom
20635 of E. */
20636static ULong dis_AVX128_shiftV_byE ( VexAbiInfo* vbi,
20637 Prefix pfx, Long delta,
20638 HChar* opname, IROp op )
20639{
20640 HChar dis_buf[50];
20641 Int alen, size;
20642 IRTemp addr;
20643 Bool shl, shr, sar;
20644 UChar modrm = getUChar(delta);
20645 UInt rG = gregOfRexRM(pfx,modrm);
20646 UInt rV = getVexNvvvv(pfx);;
20647 IRTemp g0 = newTemp(Ity_V128);
20648 IRTemp g1 = newTemp(Ity_V128);
20649 IRTemp amt = newTemp(Ity_I64);
20650 IRTemp amt8 = newTemp(Ity_I8);
20651 if (epartIsReg(modrm)) {
20652 UInt rE = eregOfRexRM(pfx,modrm);
20653 assign( amt, getXMMRegLane64(rE, 0) );
20654 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
20655 nameXMMReg(rV), nameXMMReg(rG) );
20656 delta++;
20657 } else {
20658 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20659 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
20660 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
20661 delta += alen;
20662 }
20663 assign( g0, getXMMReg(rV) );
20664 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
20665
20666 shl = shr = sar = False;
20667 size = 0;
20668 switch (op) {
20669 case Iop_ShlN16x8: shl = True; size = 32; break;
20670 case Iop_ShlN32x4: shl = True; size = 32; break;
20671 case Iop_ShlN64x2: shl = True; size = 64; break;
20672 case Iop_SarN16x8: sar = True; size = 16; break;
20673 case Iop_SarN32x4: sar = True; size = 32; break;
20674 case Iop_ShrN16x8: shr = True; size = 16; break;
20675 case Iop_ShrN32x4: shr = True; size = 32; break;
20676 case Iop_ShrN64x2: shr = True; size = 64; break;
20677 default: vassert(0);
20678 }
20679
20680 if (shl || shr) {
20681 assign(
20682 g1,
20683 IRExpr_Mux0X(
20684 unop(Iop_1Uto8,
20685 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))),
20686 mkV128(0x0000),
20687 binop(op, mkexpr(g0), mkexpr(amt8))
20688 )
20689 );
20690 } else
20691 if (sar) {
20692 assign(
20693 g1,
20694 IRExpr_Mux0X(
20695 unop(Iop_1Uto8,
20696 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))),
20697 binop(op, mkexpr(g0), mkU8(size-1)),
20698 binop(op, mkexpr(g0), mkexpr(amt8))
20699 )
20700 );
20701 } else {
20702 vassert(0);
20703 }
20704
20705 putYMMRegLoAndZU( rG, mkexpr(g1) );
20706 return delta;
20707}
20708
20709
sewardjc4530ae2012-05-21 10:18:49 +000020710/* Vector by scalar shift of E into V, by an immediate byte. Modified
20711 version of dis_SSE_shiftE_imm. */
20712static
20713Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
20714 Long delta, HChar* opname, IROp op )
20715{
20716 Bool shl, shr, sar;
20717 UChar rm = getUChar(delta);
20718 IRTemp e0 = newTemp(Ity_V128);
20719 IRTemp e1 = newTemp(Ity_V128);
20720 UInt rD = getVexNvvvv(pfx);
20721 UChar amt, size;
20722 vassert(epartIsReg(rm));
20723 vassert(gregLO3ofRM(rm) == 2
20724 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
20725 amt = getUChar(delta+1);
20726 delta += 2;
20727 DIP("%s $%d,%s,%s\n", opname,
20728 (Int)amt,
20729 nameXMMReg(eregOfRexRM(pfx,rm)),
20730 nameXMMReg(rD));
20731 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
20732
20733 shl = shr = sar = False;
20734 size = 0;
20735 switch (op) {
sewardj15ad1942012-06-20 10:21:05 +000020736 case Iop_ShlN16x8: shl = True; size = 16; break;
sewardjc4530ae2012-05-21 10:18:49 +000020737 case Iop_ShlN32x4: shl = True; size = 32; break;
sewardje8a7eb72012-06-12 14:59:17 +000020738 case Iop_ShlN64x2: shl = True; size = 64; break;
sewardjfe0c5e72012-06-15 15:48:07 +000020739 case Iop_SarN16x8: sar = True; size = 16; break;
sewardj15ad1942012-06-20 10:21:05 +000020740 case Iop_SarN32x4: sar = True; size = 32; break;
sewardj6faf7cc2012-05-25 15:53:01 +000020741 case Iop_ShrN16x8: shr = True; size = 16; break;
sewardj251b59e2012-05-25 13:51:07 +000020742 case Iop_ShrN32x4: shr = True; size = 32; break;
sewardj56c30312012-06-12 08:45:39 +000020743 case Iop_ShrN64x2: shr = True; size = 64; break;
sewardjc4530ae2012-05-21 10:18:49 +000020744 default: vassert(0);
20745 }
20746
20747 if (shl || shr) {
20748 assign( e1, amt >= size
20749 ? mkV128(0x0000)
20750 : binop(op, mkexpr(e0), mkU8(amt))
20751 );
20752 } else
20753 if (sar) {
20754 assign( e1, amt >= size
20755 ? binop(op, mkexpr(e0), mkU8(size-1))
20756 : binop(op, mkexpr(e0), mkU8(amt))
20757 );
20758 } else {
20759 vassert(0);
20760 }
20761
20762 putYMMRegLoAndZU( rD, mkexpr(e1) );
20763 return delta;
20764}
20765
20766
20767/* Lower 64-bit lane only AVX128 binary operation:
20768 G[63:0] = V[63:0] `op` E[63:0]
20769 G[127:64] = V[127:64]
20770 G[255:128] = 0.
20771 The specified op must be of the 64F0x2 kind, so that it
20772 copies the upper half of the left operand to the result.
20773*/
20774static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
20775 VexAbiInfo* vbi,
20776 Prefix pfx, Long delta,
20777 HChar* opname, IROp op )
20778{
20779 HChar dis_buf[50];
20780 Int alen;
20781 IRTemp addr;
20782 UChar rm = getUChar(delta);
20783 UInt rG = gregOfRexRM(pfx,rm);
20784 UInt rV = getVexNvvvv(pfx);
20785 IRExpr* vpart = getXMMReg(rV);
20786 if (epartIsReg(rm)) {
20787 UInt rE = eregOfRexRM(pfx,rm);
20788 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
20789 DIP("%s %s,%s,%s\n", opname,
20790 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
20791 delta = delta+1;
20792 } else {
20793 /* We can only do a 64-bit memory read, so the upper half of the
20794 E operand needs to be made simply of zeroes. */
20795 IRTemp epart = newTemp(Ity_V128);
20796 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20797 assign( epart, unop( Iop_64UtoV128,
20798 loadLE(Ity_I64, mkexpr(addr))) );
20799 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
20800 DIP("%s %s,%s,%s\n", opname,
20801 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
20802 delta = delta+alen;
20803 }
20804 putYMMRegLane128( rG, 1, mkV128(0) );
20805 *uses_vvvv = True;
20806 return delta;
20807}
20808
20809
20810/* Lower 64-bit lane only AVX128 unary operation:
20811 G[63:0] = op(E[63:0])
20812 G[127:64] = V[127:64]
20813 G[255:128] = 0
20814 The specified op must be of the 64F0x2 kind, so that it
20815 copies the upper half of the operand to the result.
20816*/
20817static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
20818 VexAbiInfo* vbi,
20819 Prefix pfx, Long delta,
20820 HChar* opname, IROp op )
20821{
20822 HChar dis_buf[50];
20823 Int alen;
20824 IRTemp addr;
20825 UChar rm = getUChar(delta);
20826 UInt rG = gregOfRexRM(pfx,rm);
20827 UInt rV = getVexNvvvv(pfx);
20828 IRTemp e64 = newTemp(Ity_I64);
20829
20830 /* Fetch E[63:0] */
20831 if (epartIsReg(rm)) {
20832 UInt rE = eregOfRexRM(pfx,rm);
20833 assign(e64, getXMMRegLane64(rE, 0));
20834 DIP("%s %s,%s,%s\n", opname,
20835 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
20836 delta += 1;
20837 } else {
20838 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20839 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
20840 DIP("%s %s,%s,%s\n", opname,
20841 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
20842 delta += alen;
20843 }
20844
20845 /* Create a value 'arg' as V[127:64]++E[63:0] */
20846 IRTemp arg = newTemp(Ity_V128);
20847 assign(arg,
20848 binop(Iop_SetV128lo64,
20849 getXMMReg(rV), mkexpr(e64)));
20850 /* and apply op to it */
20851 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
20852 *uses_vvvv = True;
20853 return delta;
20854}
20855
20856
sewardj66becf32012-06-18 23:15:16 +000020857/* Lower 32-bit lane only AVX128 unary operation:
20858 G[31:0] = op(E[31:0])
20859 G[127:32] = V[127:32]
20860 G[255:128] = 0
20861 The specified op must be of the 32F0x4 kind, so that it
20862 copies the upper 3/4 of the operand to the result.
20863*/
20864static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
20865 VexAbiInfo* vbi,
20866 Prefix pfx, Long delta,
20867 HChar* opname, IROp op )
20868{
20869 HChar dis_buf[50];
20870 Int alen;
20871 IRTemp addr;
20872 UChar rm = getUChar(delta);
20873 UInt rG = gregOfRexRM(pfx,rm);
20874 UInt rV = getVexNvvvv(pfx);
20875 IRTemp e32 = newTemp(Ity_I32);
20876
20877 /* Fetch E[31:0] */
20878 if (epartIsReg(rm)) {
20879 UInt rE = eregOfRexRM(pfx,rm);
20880 assign(e32, getXMMRegLane32(rE, 0));
20881 DIP("%s %s,%s,%s\n", opname,
20882 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
20883 delta += 1;
20884 } else {
20885 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20886 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
20887 DIP("%s %s,%s,%s\n", opname,
20888 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
20889 delta += alen;
20890 }
20891
20892 /* Create a value 'arg' as V[127:32]++E[31:0] */
20893 IRTemp arg = newTemp(Ity_V128);
20894 assign(arg,
20895 binop(Iop_SetV128lo32,
20896 getXMMReg(rV), mkexpr(e32)));
20897 /* and apply op to it */
20898 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
20899 *uses_vvvv = True;
20900 return delta;
20901}
20902
20903
sewardjc4530ae2012-05-21 10:18:49 +000020904/* Lower 32-bit lane only AVX128 binary operation:
20905 G[31:0] = V[31:0] `op` E[31:0]
20906 G[127:32] = V[127:32]
20907 G[255:128] = 0.
20908 The specified op must be of the 32F0x4 kind, so that it
20909 copies the upper 3/4 of the left operand to the result.
20910*/
20911static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
20912 VexAbiInfo* vbi,
20913 Prefix pfx, Long delta,
20914 HChar* opname, IROp op )
20915{
20916 HChar dis_buf[50];
20917 Int alen;
20918 IRTemp addr;
20919 UChar rm = getUChar(delta);
20920 UInt rG = gregOfRexRM(pfx,rm);
20921 UInt rV = getVexNvvvv(pfx);
20922 IRExpr* vpart = getXMMReg(rV);
20923 if (epartIsReg(rm)) {
20924 UInt rE = eregOfRexRM(pfx,rm);
20925 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
20926 DIP("%s %s,%s,%s\n", opname,
20927 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
20928 delta = delta+1;
20929 } else {
20930 /* We can only do a 32-bit memory read, so the upper 3/4 of the
20931 E operand needs to be made simply of zeroes. */
20932 IRTemp epart = newTemp(Ity_V128);
20933 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20934 assign( epart, unop( Iop_32UtoV128,
20935 loadLE(Ity_I32, mkexpr(addr))) );
20936 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
20937 DIP("%s %s,%s,%s\n", opname,
20938 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
20939 delta = delta+alen;
20940 }
20941 putYMMRegLane128( rG, 1, mkV128(0) );
20942 *uses_vvvv = True;
20943 return delta;
20944}
20945
20946
sewardj251b59e2012-05-25 13:51:07 +000020947/* All-lanes AVX128 binary operation:
sewardj56c30312012-06-12 08:45:39 +000020948 G[127:0] = V[127:0] `op` E[127:0]
sewardj251b59e2012-05-25 13:51:07 +000020949 G[255:128] = 0.
20950*/
20951static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
20952 VexAbiInfo* vbi,
20953 Prefix pfx, Long delta,
20954 HChar* opname, IROp op )
20955{
sewardj29a219c2012-06-04 07:38:10 +000020956 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
20957 uses_vvvv, vbi, pfx, delta, opname, op,
20958 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
20959 );
sewardj251b59e2012-05-25 13:51:07 +000020960}
20961
20962
sewardjc4530ae2012-05-21 10:18:49 +000020963/* Handles AVX128 32F/64F comparisons. A derivative of
20964 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
20965 original delta to indicate failure. */
20966static
20967Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
20968 VexAbiInfo* vbi,
20969 Prefix pfx, Long delta,
20970 HChar* opname, Bool all_lanes, Int sz )
20971{
sewardjc93904b2012-05-27 13:50:42 +000020972 vassert(sz == 4 || sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +000020973 Long deltaIN = delta;
20974 HChar dis_buf[50];
20975 Int alen;
20976 UInt imm8;
20977 IRTemp addr;
20978 Bool preSwap = False;
20979 IROp op = Iop_INVALID;
20980 Bool postNot = False;
20981 IRTemp plain = newTemp(Ity_V128);
20982 UChar rm = getUChar(delta);
sewardjc93904b2012-05-27 13:50:42 +000020983 UInt rG = gregOfRexRM(pfx, rm);
20984 UInt rV = getVexNvvvv(pfx);
20985 IRTemp argL = newTemp(Ity_V128);
20986 IRTemp argR = newTemp(Ity_V128);
20987
20988 assign(argL, getXMMReg(rV));
sewardjc4530ae2012-05-21 10:18:49 +000020989 if (epartIsReg(rm)) {
20990 imm8 = getUChar(delta+1);
20991 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
20992 if (!ok) return deltaIN; /* FAIL */
20993 UInt rE = eregOfRexRM(pfx,rm);
sewardjc93904b2012-05-27 13:50:42 +000020994 assign(argR, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000020995 delta += 1+1;
20996 DIP("%s $%d,%s,%s,%s\n",
20997 opname, (Int)imm8,
20998 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
20999 } else {
21000 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
21001 imm8 = getUChar(delta+alen);
21002 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
21003 if (!ok) return deltaIN; /* FAIL */
sewardjc93904b2012-05-27 13:50:42 +000021004 assign(argR,
21005 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
sewardjc4530ae2012-05-21 10:18:49 +000021006 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
sewardjc93904b2012-05-27 13:50:42 +000021007 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
sewardjc4530ae2012-05-21 10:18:49 +000021008 delta += alen+1;
21009 DIP("%s $%d,%s,%s,%s\n",
21010 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
21011 }
21012
sewardjc93904b2012-05-27 13:50:42 +000021013 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL))
21014 : binop(op, mkexpr(argL), mkexpr(argR)));
sewardjc4530ae2012-05-21 10:18:49 +000021015
sewardjc93904b2012-05-27 13:50:42 +000021016 if (all_lanes) {
21017 /* This is simple: just invert the result, if necessary, and
21018 have done. */
21019 if (postNot) {
21020 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
21021 } else {
21022 putYMMRegLoAndZU( rG, mkexpr(plain) );
21023 }
sewardjc4530ae2012-05-21 10:18:49 +000021024 }
21025 else
sewardjc93904b2012-05-27 13:50:42 +000021026 if (!preSwap) {
21027 /* More complex. It's a one-lane-only, hence need to possibly
21028 invert only that one lane. But at least the other lanes are
21029 correctly "in" the result, having been copied from the left
21030 operand (argL). */
21031 if (postNot) {
21032 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
21033 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
21034 mask) );
21035 } else {
21036 putYMMRegLoAndZU( rG, mkexpr(plain) );
21037 }
sewardjc4530ae2012-05-21 10:18:49 +000021038 }
21039 else {
sewardjc93904b2012-05-27 13:50:42 +000021040 /* This is the most complex case. One-lane-only, but the args
21041 were swapped. So we have to possibly invert the bottom lane,
21042 and (definitely) we have to copy the upper lane(s) from argL
21043 since, due to the swapping, what's currently there is from
21044 argR, which is not correct. */
21045 IRTemp res = newTemp(Ity_V128);
21046 IRTemp mask = newTemp(Ity_V128);
21047 IRTemp notMask = newTemp(Ity_V128);
21048 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
21049 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
21050 if (postNot) {
21051 assign(res,
21052 binop(Iop_OrV128,
21053 binop(Iop_AndV128,
21054 unop(Iop_NotV128, mkexpr(plain)),
21055 mkexpr(mask)),
21056 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
21057 } else {
21058 assign(res,
21059 binop(Iop_OrV128,
21060 binop(Iop_AndV128,
21061 mkexpr(plain),
21062 mkexpr(mask)),
21063 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
21064 }
21065 putYMMRegLoAndZU( rG, mkexpr(res) );
sewardjc4530ae2012-05-21 10:18:49 +000021066 }
21067
21068 *uses_vvvv = True;
21069 return delta;
21070}
21071
21072
sewardj89378162012-06-24 12:12:20 +000021073/* Handles AVX256 32F/64F comparisons. A derivative of
21074 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
21075 original delta to indicate failure. */
21076static
21077Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
21078 VexAbiInfo* vbi,
21079 Prefix pfx, Long delta,
21080 HChar* opname, Int sz )
21081{
21082 vassert(sz == 4 || sz == 8);
21083 Long deltaIN = delta;
21084 HChar dis_buf[50];
21085 Int alen;
21086 UInt imm8;
21087 IRTemp addr;
21088 Bool preSwap = False;
21089 IROp op = Iop_INVALID;
21090 Bool postNot = False;
21091 IRTemp plain = newTemp(Ity_V256);
21092 UChar rm = getUChar(delta);
21093 UInt rG = gregOfRexRM(pfx, rm);
21094 UInt rV = getVexNvvvv(pfx);
21095 IRTemp argL = newTemp(Ity_V256);
21096 IRTemp argR = newTemp(Ity_V256);
21097 IRTemp argLhi = IRTemp_INVALID;
21098 IRTemp argLlo = IRTemp_INVALID;
21099 IRTemp argRhi = IRTemp_INVALID;
21100 IRTemp argRlo = IRTemp_INVALID;
21101
21102 assign(argL, getYMMReg(rV));
21103 if (epartIsReg(rm)) {
21104 imm8 = getUChar(delta+1);
21105 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
21106 True/*all_lanes*/, sz);
21107 if (!ok) return deltaIN; /* FAIL */
21108 UInt rE = eregOfRexRM(pfx,rm);
21109 assign(argR, getYMMReg(rE));
21110 delta += 1+1;
21111 DIP("%s $%d,%s,%s,%s\n",
21112 opname, (Int)imm8,
21113 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
21114 } else {
21115 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
21116 imm8 = getUChar(delta+alen);
21117 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
21118 True/*all_lanes*/, sz);
21119 if (!ok) return deltaIN; /* FAIL */
21120 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
21121 delta += alen+1;
21122 DIP("%s $%d,%s,%s,%s\n",
21123 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
21124 }
21125
sewardjb1a41a22012-06-24 13:27:46 +000021126 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
21127 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
sewardj89378162012-06-24 12:12:20 +000021128 assign(plain, binop( Iop_V128HLtoV256,
21129 binop(op, mkexpr(argLhi), mkexpr(argRhi)),
21130 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) );
21131
21132 /* This is simple: just invert the result, if necessary, and
21133 have done. */
21134 if (postNot) {
21135 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
21136 } else {
21137 putYMMReg( rG, mkexpr(plain) );
21138 }
21139
21140 *uses_vvvv = True;
21141 return delta;
21142}
21143
21144
sewardj97f72452012-05-23 05:56:53 +000021145/* Handles AVX128 unary E-to-G all-lanes operations. */
21146static
21147Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
21148 VexAbiInfo* vbi,
21149 Prefix pfx, Long delta,
21150 HChar* opname,
21151 IRTemp (*opFn)(IRTemp) )
21152{
21153 HChar dis_buf[50];
21154 Int alen;
21155 IRTemp addr;
21156 IRTemp res = newTemp(Ity_V128);
21157 IRTemp arg = newTemp(Ity_V128);
21158 UChar rm = getUChar(delta);
21159 UInt rG = gregOfRexRM(pfx, rm);
21160 if (epartIsReg(rm)) {
21161 UInt rE = eregOfRexRM(pfx,rm);
21162 assign(arg, getXMMReg(rE));
21163 delta += 1;
21164 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
21165 } else {
sewardj29ac4282012-05-24 06:31:21 +000021166 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj97f72452012-05-23 05:56:53 +000021167 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
21168 delta += alen;
21169 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
21170 }
21171 res = opFn(arg);
21172 putYMMRegLoAndZU( rG, mkexpr(res) );
21173 *uses_vvvv = False;
21174 return delta;
21175}
21176
21177
sewardj66becf32012-06-18 23:15:16 +000021178/* Handles AVX128 unary E-to-G all-lanes operations. */
21179static
21180Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
21181 VexAbiInfo* vbi,
21182 Prefix pfx, Long delta,
21183 HChar* opname, IROp op )
21184{
21185 HChar dis_buf[50];
21186 Int alen;
21187 IRTemp addr;
21188 IRTemp arg = newTemp(Ity_V128);
21189 UChar rm = getUChar(delta);
21190 UInt rG = gregOfRexRM(pfx, rm);
21191 if (epartIsReg(rm)) {
21192 UInt rE = eregOfRexRM(pfx,rm);
21193 assign(arg, getXMMReg(rE));
21194 delta += 1;
21195 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
21196 } else {
21197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21198 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
21199 delta += alen;
21200 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
21201 }
21202 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
21203 *uses_vvvv = False;
21204 return delta;
21205}
21206
21207
sewardj56c30312012-06-12 08:45:39 +000021208/* FIXME: common up with the _128_ version above? */
21209static
21210Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
21211 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
21212 Prefix pfx, Long delta, HChar* name,
21213 /* The actual operation. Use either 'op' or 'opfn',
21214 but not both. */
21215 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
21216 Bool invertLeftArg,
21217 Bool swapArgs
21218 )
21219{
21220 UChar modrm = getUChar(delta);
21221 UInt rD = gregOfRexRM(pfx, modrm);
21222 UInt rSL = getVexNvvvv(pfx);
21223 IRTemp tSL = newTemp(Ity_V256);
21224 IRTemp tSR = newTemp(Ity_V256);
21225 IRTemp addr = IRTemp_INVALID;
21226 HChar dis_buf[50];
21227 Int alen = 0;
21228 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
21229
sewardj2a2bda92012-06-14 23:32:02 +000021230 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
21231 : getYMMReg(rSL));
sewardj56c30312012-06-12 08:45:39 +000021232
21233 if (epartIsReg(modrm)) {
21234 UInt rSR = eregOfRexRM(pfx, modrm);
21235 delta += 1;
21236 assign(tSR, getYMMReg(rSR));
21237 DIP("%s %s,%s,%s\n",
21238 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
21239 } else {
21240 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
21241 delta += alen;
21242 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
21243 DIP("%s %s,%s,%s\n",
21244 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
21245 }
21246
21247 IRTemp res = IRTemp_INVALID;
21248 if (op != Iop_INVALID) {
21249 vassert(opFn == NULL);
21250 res = newTemp(Ity_V256);
21251 assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL))
21252 : binop(op, mkexpr(tSL), mkexpr(tSR)));
21253 } else {
21254 vassert(opFn != NULL);
21255 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
21256 }
21257
21258 putYMMReg(rD, mkexpr(res));
21259
21260 *uses_vvvv = True;
21261 return delta;
21262}
21263
21264
21265/* All-lanes AVX256 binary operation:
21266 G[255:0] = V[255:0] `op` E[255:0]
21267*/
21268static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
21269 VexAbiInfo* vbi,
21270 Prefix pfx, Long delta,
21271 HChar* opname, IROp op )
21272{
21273 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
21274 uses_vvvv, vbi, pfx, delta, opname, op,
21275 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
21276 );
21277}
21278
21279
sewardj89378162012-06-24 12:12:20 +000021280/* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
21281 generator to compute the result, no inversion of the left
21282 arg, and no swapping of args. */
21283static
21284Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
21285 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
21286 Prefix pfx, Long delta, HChar* name,
21287 IRTemp(*opFn)(IRTemp,IRTemp)
21288 )
21289{
21290 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
21291 uses_vvvv, vbi, pfx, delta, name,
21292 Iop_INVALID, opFn, False, False );
21293}
21294
21295
sewardj66becf32012-06-18 23:15:16 +000021296/* Handles AVX256 unary E-to-G all-lanes operations. */
21297static
21298Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
21299 VexAbiInfo* vbi,
21300 Prefix pfx, Long delta,
21301 HChar* opname, IROp op )
21302{
21303 HChar dis_buf[50];
21304 Int alen;
21305 IRTemp addr;
21306 IRTemp arg = newTemp(Ity_V256);
21307 UChar rm = getUChar(delta);
21308 UInt rG = gregOfRexRM(pfx, rm);
21309 if (epartIsReg(rm)) {
21310 UInt rE = eregOfRexRM(pfx,rm);
21311 assign(arg, getYMMReg(rE));
21312 delta += 1;
21313 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
21314 } else {
21315 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21316 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
21317 delta += alen;
21318 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
21319 }
21320 putYMMReg( rG, unop(op, mkexpr(arg)) );
21321 *uses_vvvv = False;
21322 return delta;
21323}
21324
21325
sewardj6fcd43e2012-06-14 08:51:35 +000021326/* The use of ReinterpF64asI64 is ugly. Surely could do better if we
21327 had a variant of Iop_64x4toV256 that took F64s as args instead. */
21328static Long dis_CVTDQ2PD_256 ( VexAbiInfo* vbi, Prefix pfx,
21329 Long delta )
21330{
21331 IRTemp addr = IRTemp_INVALID;
21332 Int alen = 0;
21333 HChar dis_buf[50];
21334 UChar modrm = getUChar(delta);
21335 IRTemp sV = newTemp(Ity_V128);
21336 UInt rG = gregOfRexRM(pfx,modrm);
21337 if (epartIsReg(modrm)) {
21338 UInt rE = eregOfRexRM(pfx,modrm);
21339 assign( sV, getXMMReg(rE) );
21340 delta += 1;
21341 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
21342 } else {
21343 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21344 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
21345 delta += alen;
21346 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
21347 }
21348 IRTemp s3, s2, s1, s0;
21349 s3 = s2 = s1 = s0 = IRTemp_INVALID;
21350 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
21351 IRExpr* res
21352 = IRExpr_Qop(
21353 Iop_64x4toV256,
21354 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
21355 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
21356 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
21357 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
21358 );
21359 putYMMReg(rG, res);
21360 return delta;
21361}
21362
21363
21364static Long dis_CVTPD2PS_256 ( VexAbiInfo* vbi, Prefix pfx,
21365 Long delta )
21366{
21367 IRTemp addr = IRTemp_INVALID;
21368 Int alen = 0;
21369 HChar dis_buf[50];
21370 UChar modrm = getUChar(delta);
21371 UInt rG = gregOfRexRM(pfx,modrm);
21372 IRTemp argV = newTemp(Ity_V256);
21373 IRTemp rmode = newTemp(Ity_I32);
21374 if (epartIsReg(modrm)) {
21375 UInt rE = eregOfRexRM(pfx,modrm);
21376 assign( argV, getYMMReg(rE) );
21377 delta += 1;
21378 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
21379 } else {
21380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21381 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
21382 delta += alen;
21383 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
21384 }
21385
21386 assign( rmode, get_sse_roundingmode() );
21387 IRTemp t3, t2, t1, t0;
21388 t3 = t2 = t1 = t0 = IRTemp_INVALID;
21389 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
21390# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
21391 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
21392 putXMMRegLane32F( rG, 3, CVT(t3) );
21393 putXMMRegLane32F( rG, 2, CVT(t2) );
21394 putXMMRegLane32F( rG, 1, CVT(t1) );
21395 putXMMRegLane32F( rG, 0, CVT(t0) );
21396# undef CVT
21397 putYMMRegLane128( rG, 1, mkV128(0) );
21398 return delta;
21399}
21400
21401
sewardjc4530ae2012-05-21 10:18:49 +000021402__attribute__((noinline))
21403static
21404Long dis_ESC_0F__VEX (
21405 /*MB_OUT*/DisResult* dres,
21406 /*OUT*/ Bool* uses_vvvv,
21407 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
21408 Bool resteerCisOk,
21409 void* callback_opaque,
21410 VexArchInfo* archinfo,
21411 VexAbiInfo* vbi,
21412 Prefix pfx, Int sz, Long deltaIN
21413 )
21414{
21415 IRTemp addr = IRTemp_INVALID;
21416 Int alen = 0;
21417 HChar dis_buf[50];
21418 Long delta = deltaIN;
21419 UChar opc = getUChar(delta);
21420 delta++;
21421 *uses_vvvv = False;
21422
21423 switch (opc) {
21424
21425 case 0x10:
21426 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
21427 /* Move 64 bits from E (mem only) to G (lo half xmm).
21428 Bits 255-64 of the dest are zeroed out. */
21429 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
21430 UChar modrm = getUChar(delta);
21431 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21432 UInt rG = gregOfRexRM(pfx,modrm);
21433 IRTemp z128 = newTemp(Ity_V128);
21434 assign(z128, mkV128(0));
21435 putXMMReg( rG, mkexpr(z128) );
21436 /* FIXME: ALIGNMENT CHECK? */
21437 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
21438 putYMMRegLane128( rG, 1, mkexpr(z128) );
21439 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
21440 delta += alen;
21441 goto decode_success;
21442 }
sewardj21459cb2012-06-18 14:05:52 +000021443 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
21444 /* Reg form. */
21445 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
21446 UChar modrm = getUChar(delta);
21447 UInt rG = gregOfRexRM(pfx, modrm);
21448 UInt rE = eregOfRexRM(pfx, modrm);
21449 UInt rV = getVexNvvvv(pfx);
21450 delta++;
21451 DIP("vmovsd %s,%s,%s\n",
21452 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21453 IRTemp res = newTemp(Ity_V128);
21454 assign(res, binop(Iop_64HLtoV128,
21455 getXMMRegLane64(rV, 1),
21456 getXMMRegLane64(rE, 0)));
21457 putYMMRegLoAndZU(rG, mkexpr(res));
21458 *uses_vvvv = True;
21459 goto decode_success;
21460 }
sewardjc4530ae2012-05-21 10:18:49 +000021461 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
21462 /* Move 32 bits from E (mem only) to G (lo half xmm).
21463 Bits 255-32 of the dest are zeroed out. */
21464 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
21465 UChar modrm = getUChar(delta);
21466 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21467 UInt rG = gregOfRexRM(pfx,modrm);
21468 IRTemp z128 = newTemp(Ity_V128);
21469 assign(z128, mkV128(0));
21470 putXMMReg( rG, mkexpr(z128) );
21471 /* FIXME: ALIGNMENT CHECK? */
21472 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
21473 putYMMRegLane128( rG, 1, mkexpr(z128) );
21474 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
21475 delta += alen;
21476 goto decode_success;
21477 }
sewardj15ad1942012-06-20 10:21:05 +000021478 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
21479 /* Reg form. */
21480 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
21481 UChar modrm = getUChar(delta);
21482 UInt rG = gregOfRexRM(pfx, modrm);
21483 UInt rE = eregOfRexRM(pfx, modrm);
21484 UInt rV = getVexNvvvv(pfx);
21485 delta++;
21486 DIP("vmovss %s,%s,%s\n",
21487 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21488 IRTemp res = newTemp(Ity_V128);
21489 assign( res, binop( Iop_64HLtoV128,
21490 getXMMRegLane64(rV, 1),
21491 binop(Iop_32HLto64,
21492 getXMMRegLane32(rV, 1),
21493 getXMMRegLane32(rE, 0)) ) );
21494 putYMMRegLoAndZU(rG, mkexpr(res));
21495 *uses_vvvv = True;
21496 goto decode_success;
21497 }
sewardj56c30312012-06-12 08:45:39 +000021498 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
21499 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21500 UChar modrm = getUChar(delta);
21501 UInt rG = gregOfRexRM(pfx, modrm);
21502 if (epartIsReg(modrm)) {
21503 UInt rE = eregOfRexRM(pfx,modrm);
21504 putYMMRegLoAndZU( rG, getXMMReg( rE ));
21505 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
21506 delta += 1;
21507 } else {
21508 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21509 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
21510 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
21511 delta += alen;
21512 }
21513 goto decode_success;
21514 }
sewardjfce47a62012-06-03 23:12:33 +000021515 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
21516 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21517 UChar modrm = getUChar(delta);
21518 UInt rG = gregOfRexRM(pfx, modrm);
21519 if (epartIsReg(modrm)) {
21520 UInt rE = eregOfRexRM(pfx,modrm);
21521 putYMMReg( rG, getYMMReg( rE ));
21522 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
21523 delta += 1;
21524 } else {
21525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21526 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
21527 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
21528 delta += alen;
21529 }
21530 goto decode_success;
21531 }
sewardj56c30312012-06-12 08:45:39 +000021532 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
21533 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21534 UChar modrm = getUChar(delta);
21535 UInt rG = gregOfRexRM(pfx, modrm);
21536 if (epartIsReg(modrm)) {
21537 UInt rE = eregOfRexRM(pfx,modrm);
21538 putYMMRegLoAndZU( rG, getXMMReg( rE ));
21539 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
21540 delta += 1;
21541 } else {
21542 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21543 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
21544 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
21545 delta += alen;
21546 }
21547 goto decode_success;
21548 }
sewardj66becf32012-06-18 23:15:16 +000021549 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
21550 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21551 UChar modrm = getUChar(delta);
21552 UInt rG = gregOfRexRM(pfx, modrm);
21553 if (epartIsReg(modrm)) {
21554 UInt rE = eregOfRexRM(pfx,modrm);
21555 putYMMReg( rG, getYMMReg( rE ));
21556 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
21557 delta += 1;
21558 } else {
21559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21560 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
21561 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
21562 delta += alen;
21563 }
21564 goto decode_success;
21565 }
sewardjc4530ae2012-05-21 10:18:49 +000021566 break;
21567
21568 case 0x11:
21569 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
21570 /* Move 64 bits from G (low half xmm) to mem only. */
21571 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
21572 UChar modrm = getUChar(delta);
21573 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21574 UInt rG = gregOfRexRM(pfx,modrm);
21575 /* FIXME: ALIGNMENT CHECK? */
21576 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
21577 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
21578 delta += alen;
21579 goto decode_success;
21580 }
sewardj21459cb2012-06-18 14:05:52 +000021581 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
21582 /* Reg form. */
21583 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
21584 UChar modrm = getUChar(delta);
21585 UInt rG = gregOfRexRM(pfx, modrm);
21586 UInt rE = eregOfRexRM(pfx, modrm);
21587 UInt rV = getVexNvvvv(pfx);
21588 delta++;
21589 DIP("vmovsd %s,%s,%s\n",
21590 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21591 IRTemp res = newTemp(Ity_V128);
21592 assign(res, binop(Iop_64HLtoV128,
21593 getXMMRegLane64(rV, 1),
21594 getXMMRegLane64(rE, 0)));
21595 putYMMRegLoAndZU(rG, mkexpr(res));
21596 *uses_vvvv = True;
21597 goto decode_success;
21598 }
sewardjc4530ae2012-05-21 10:18:49 +000021599 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
21600 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
21601 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
21602 UChar modrm = getUChar(delta);
21603 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21604 UInt rG = gregOfRexRM(pfx,modrm);
21605 /* FIXME: ALIGNMENT CHECK? */
21606 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
21607 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
21608 delta += alen;
21609 goto decode_success;
21610 }
sewardj15ad1942012-06-20 10:21:05 +000021611 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
21612 /* Reg form. */
21613 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
21614 UChar modrm = getUChar(delta);
21615 UInt rG = gregOfRexRM(pfx, modrm);
21616 UInt rE = eregOfRexRM(pfx, modrm);
21617 UInt rV = getVexNvvvv(pfx);
21618 delta++;
21619 DIP("vmovss %s,%s,%s\n",
21620 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21621 IRTemp res = newTemp(Ity_V128);
21622 assign( res, binop( Iop_64HLtoV128,
21623 getXMMRegLane64(rV, 1),
21624 binop(Iop_32HLto64,
21625 getXMMRegLane32(rV, 1),
21626 getXMMRegLane32(rE, 0)) ) );
21627 putYMMRegLoAndZU(rG, mkexpr(res));
21628 *uses_vvvv = True;
21629 goto decode_success;
21630 }
sewardjc4530ae2012-05-21 10:18:49 +000021631 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
21632 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21633 UChar modrm = getUChar(delta);
21634 UInt rG = gregOfRexRM(pfx,modrm);
21635 if (epartIsReg(modrm)) {
21636 UInt rE = eregOfRexRM(pfx,modrm);
21637 putYMMRegLoAndZU( rE, getXMMReg(rG) );
21638 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
21639 delta += 1;
21640 } else {
21641 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21642 storeLE( mkexpr(addr), getXMMReg(rG) );
21643 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
21644 delta += alen;
21645 }
21646 goto decode_success;
21647 }
sewardj4ed05e02012-06-18 15:01:30 +000021648 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
21649 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21650 UChar modrm = getUChar(delta);
21651 UInt rG = gregOfRexRM(pfx,modrm);
21652 if (epartIsReg(modrm)) {
21653 UInt rE = eregOfRexRM(pfx,modrm);
21654 putYMMReg( rE, getYMMReg(rG) );
21655 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
21656 delta += 1;
21657 } else {
21658 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21659 storeLE( mkexpr(addr), getYMMReg(rG) );
21660 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
21661 delta += alen;
21662 }
21663 goto decode_success;
21664 }
sewardj6eaf00c2012-05-23 11:33:56 +000021665 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
21666 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21667 UChar modrm = getUChar(delta);
21668 UInt rG = gregOfRexRM(pfx,modrm);
21669 if (epartIsReg(modrm)) {
21670 UInt rE = eregOfRexRM(pfx,modrm);
21671 putYMMRegLoAndZU( rE, getXMMReg(rG) );
21672 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
21673 delta += 1;
21674 } else {
21675 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21676 storeLE( mkexpr(addr), getXMMReg(rG) );
21677 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
21678 delta += alen;
21679 }
21680 goto decode_success;
21681 }
sewardj4ed05e02012-06-18 15:01:30 +000021682 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
21683 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfce47a62012-06-03 23:12:33 +000021684 UChar modrm = getUChar(delta);
21685 UInt rG = gregOfRexRM(pfx,modrm);
21686 if (epartIsReg(modrm)) {
21687 UInt rE = eregOfRexRM(pfx,modrm);
21688 putYMMReg( rE, getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000021689 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjfce47a62012-06-03 23:12:33 +000021690 delta += 1;
21691 } else {
21692 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21693 storeLE( mkexpr(addr), getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000021694 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
sewardjfce47a62012-06-03 23:12:33 +000021695 delta += alen;
21696 }
21697 goto decode_success;
21698 }
sewardjc4530ae2012-05-21 10:18:49 +000021699 break;
21700
21701 case 0x12:
sewardj53b12782012-05-22 23:34:06 +000021702 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
sewardjc4530ae2012-05-21 10:18:49 +000021703 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21704 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
21705 goto decode_success;
21706 }
sewardj82096922012-06-24 14:57:59 +000021707 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
21708 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21709 delta = dis_MOVDDUP_256( vbi, pfx, delta );
21710 goto decode_success;
21711 }
sewardj53b12782012-05-22 23:34:06 +000021712 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
21713 /* Insn only exists in reg form */
21714 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
21715 && epartIsReg(getUChar(delta))) {
21716 UChar modrm = getUChar(delta);
21717 UInt rG = gregOfRexRM(pfx, modrm);
21718 UInt rE = eregOfRexRM(pfx, modrm);
21719 UInt rV = getVexNvvvv(pfx);
21720 delta++;
21721 DIP("vmovhlps %s,%s,%s\n",
21722 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21723 IRTemp res = newTemp(Ity_V128);
21724 assign(res, binop(Iop_64HLtoV128,
21725 getXMMRegLane64(rV, 1),
21726 getXMMRegLane64(rE, 1)));
21727 putYMMRegLoAndZU(rG, mkexpr(res));
21728 *uses_vvvv = True;
21729 goto decode_success;
21730 }
sewardj82096922012-06-24 14:57:59 +000021731 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
21732 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000021733 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
21734 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000021735 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
21736 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000021737 UChar modrm = getUChar(delta);
21738 UInt rG = gregOfRexRM(pfx, modrm);
21739 UInt rV = getVexNvvvv(pfx);
21740 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21741 delta += alen;
21742 DIP("vmovlpd %s,%s,%s\n",
21743 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
21744 IRTemp res = newTemp(Ity_V128);
21745 assign(res, binop(Iop_64HLtoV128,
21746 getXMMRegLane64(rV, 1),
21747 loadLE(Ity_I64, mkexpr(addr))));
21748 putYMMRegLoAndZU(rG, mkexpr(res));
21749 *uses_vvvv = True;
21750 goto decode_success;
21751 }
sewardj15ad1942012-06-20 10:21:05 +000021752 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
21753 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
21754 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
21755 True/*isL*/ );
21756 goto decode_success;
21757 }
21758 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
21759 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
21760 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
21761 goto decode_success;
21762 }
sewardj21459cb2012-06-18 14:05:52 +000021763 break;
21764
21765 case 0x13:
sewardj82096922012-06-24 14:57:59 +000021766 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
21767 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000021768 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
21769 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000021770 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
21771 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000021772 UChar modrm = getUChar(delta);
21773 UInt rG = gregOfRexRM(pfx, modrm);
21774 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21775 delta += alen;
21776 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
21777 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
21778 goto decode_success;
21779 }
sewardjc4530ae2012-05-21 10:18:49 +000021780 break;
21781
21782 case 0x14:
sewardj56c30312012-06-12 08:45:39 +000021783 case 0x15:
sewardjc4530ae2012-05-21 10:18:49 +000021784 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
sewardj56c30312012-06-12 08:45:39 +000021785 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
sewardjc4530ae2012-05-21 10:18:49 +000021786 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21787 Bool hi = opc == 0x15;
21788 UChar modrm = getUChar(delta);
21789 UInt rG = gregOfRexRM(pfx,modrm);
21790 UInt rV = getVexNvvvv(pfx);
21791 IRTemp eV = newTemp(Ity_V128);
21792 IRTemp vV = newTemp(Ity_V128);
21793 assign( vV, getXMMReg(rV) );
21794 if (epartIsReg(modrm)) {
21795 UInt rE = eregOfRexRM(pfx,modrm);
21796 assign( eV, getXMMReg(rE) );
21797 delta += 1;
21798 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
21799 nameXMMReg(rE), nameXMMReg(rG));
21800 } else {
21801 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21802 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
21803 delta += alen;
21804 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
21805 dis_buf, nameXMMReg(rG));
21806 }
sewardj56c30312012-06-12 08:45:39 +000021807 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
21808 putYMMRegLoAndZU( rG, mkexpr(res) );
21809 *uses_vvvv = True;
21810 goto decode_success;
21811 }
sewardj4b1cc832012-06-13 11:10:20 +000021812 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
21813 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
21814 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21815 Bool hi = opc == 0x15;
21816 UChar modrm = getUChar(delta);
21817 UInt rG = gregOfRexRM(pfx,modrm);
21818 UInt rV = getVexNvvvv(pfx);
21819 IRTemp eV = newTemp(Ity_V256);
21820 IRTemp vV = newTemp(Ity_V256);
21821 assign( vV, getYMMReg(rV) );
21822 if (epartIsReg(modrm)) {
21823 UInt rE = eregOfRexRM(pfx,modrm);
21824 assign( eV, getYMMReg(rE) );
21825 delta += 1;
21826 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
21827 nameYMMReg(rE), nameYMMReg(rG));
21828 } else {
21829 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21830 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
21831 delta += alen;
21832 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
21833 dis_buf, nameYMMReg(rG));
21834 }
21835 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
21836 putYMMReg( rG, mkexpr(res) );
21837 *uses_vvvv = True;
21838 goto decode_success;
21839 }
sewardj56c30312012-06-12 08:45:39 +000021840 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
21841 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
21842 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21843 Bool hi = opc == 0x15;
21844 UChar modrm = getUChar(delta);
21845 UInt rG = gregOfRexRM(pfx,modrm);
21846 UInt rV = getVexNvvvv(pfx);
21847 IRTemp eV = newTemp(Ity_V128);
21848 IRTemp vV = newTemp(Ity_V128);
21849 assign( vV, getXMMReg(rV) );
21850 if (epartIsReg(modrm)) {
21851 UInt rE = eregOfRexRM(pfx,modrm);
21852 assign( eV, getXMMReg(rE) );
21853 delta += 1;
21854 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
21855 nameXMMReg(rE), nameXMMReg(rG));
21856 } else {
21857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21858 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
21859 delta += alen;
21860 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
21861 dis_buf, nameXMMReg(rG));
21862 }
21863 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000021864 putYMMRegLoAndZU( rG, mkexpr(res) );
21865 *uses_vvvv = True;
21866 goto decode_success;
21867 }
sewardj4b1cc832012-06-13 11:10:20 +000021868 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
21869 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
21870 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21871 Bool hi = opc == 0x15;
21872 UChar modrm = getUChar(delta);
21873 UInt rG = gregOfRexRM(pfx,modrm);
21874 UInt rV = getVexNvvvv(pfx);
21875 IRTemp eV = newTemp(Ity_V256);
21876 IRTemp vV = newTemp(Ity_V256);
21877 assign( vV, getYMMReg(rV) );
21878 if (epartIsReg(modrm)) {
21879 UInt rE = eregOfRexRM(pfx,modrm);
21880 assign( eV, getYMMReg(rE) );
21881 delta += 1;
21882 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
21883 nameYMMReg(rE), nameYMMReg(rG));
21884 } else {
21885 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21886 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
21887 delta += alen;
21888 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
21889 dis_buf, nameYMMReg(rG));
21890 }
21891 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
21892 putYMMReg( rG, mkexpr(res) );
21893 *uses_vvvv = True;
21894 goto decode_success;
21895 }
sewardjc4530ae2012-05-21 10:18:49 +000021896 break;
21897
sewardj91fa9802012-05-23 06:16:26 +000021898 case 0x16:
21899 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
21900 /* Insn only exists in reg form */
21901 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
21902 && epartIsReg(getUChar(delta))) {
21903 UChar modrm = getUChar(delta);
21904 UInt rG = gregOfRexRM(pfx, modrm);
21905 UInt rE = eregOfRexRM(pfx, modrm);
21906 UInt rV = getVexNvvvv(pfx);
21907 delta++;
21908 DIP("vmovlhps %s,%s,%s\n",
21909 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
21910 IRTemp res = newTemp(Ity_V128);
21911 assign(res, binop(Iop_64HLtoV128,
21912 getXMMRegLane64(rE, 0),
21913 getXMMRegLane64(rV, 0)));
21914 putYMMRegLoAndZU(rG, mkexpr(res));
21915 *uses_vvvv = True;
21916 goto decode_success;
21917 }
sewardj8eb7ae82012-06-24 14:00:27 +000021918 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
21919 /* Insn exists only in mem form, it appears. */
sewardj6fcd43e2012-06-14 08:51:35 +000021920 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
21921 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000021922 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
21923 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj6fcd43e2012-06-14 08:51:35 +000021924 UChar modrm = getUChar(delta);
21925 UInt rG = gregOfRexRM(pfx, modrm);
21926 UInt rV = getVexNvvvv(pfx);
21927 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21928 delta += alen;
sewardj8eb7ae82012-06-24 14:00:27 +000021929 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
sewardj6fcd43e2012-06-14 08:51:35 +000021930 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
21931 IRTemp res = newTemp(Ity_V128);
21932 assign(res, binop(Iop_64HLtoV128,
21933 loadLE(Ity_I64, mkexpr(addr)),
21934 getXMMRegLane64(rV, 0)));
21935 putYMMRegLoAndZU(rG, mkexpr(res));
21936 *uses_vvvv = True;
21937 goto decode_success;
21938 }
sewardj15ad1942012-06-20 10:21:05 +000021939 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
21940 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
21941 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
21942 False/*!isL*/ );
21943 goto decode_success;
21944 }
21945 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
21946 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
21947 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
21948 goto decode_success;
21949 }
sewardj91fa9802012-05-23 06:16:26 +000021950 break;
21951
sewardj8ef22422012-05-24 16:29:18 +000021952 case 0x17:
sewardj8eb7ae82012-06-24 14:00:27 +000021953 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
21954 /* Insn exists only in mem form, it appears. */
sewardj8ef22422012-05-24 16:29:18 +000021955 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000021956 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000021957 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
21958 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj8ef22422012-05-24 16:29:18 +000021959 UChar modrm = getUChar(delta);
21960 UInt rG = gregOfRexRM(pfx, modrm);
21961 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21962 delta += alen;
21963 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
sewardj8eb7ae82012-06-24 14:00:27 +000021964 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
21965 nameXMMReg(rG), dis_buf);
sewardj8ef22422012-05-24 16:29:18 +000021966 goto decode_success;
21967 }
21968 break;
21969
sewardjc4530ae2012-05-21 10:18:49 +000021970 case 0x28:
21971 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
21972 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
21973 UChar modrm = getUChar(delta);
21974 UInt rG = gregOfRexRM(pfx, modrm);
21975 if (epartIsReg(modrm)) {
21976 UInt rE = eregOfRexRM(pfx,modrm);
21977 putYMMRegLoAndZU( rG, getXMMReg( rE ));
21978 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
21979 delta += 1;
21980 } else {
21981 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21982 gen_SEGV_if_not_16_aligned( addr );
21983 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
21984 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
21985 delta += alen;
21986 }
21987 goto decode_success;
21988 }
21989 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
21990 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
21991 UChar modrm = getUChar(delta);
21992 UInt rG = gregOfRexRM(pfx, modrm);
21993 if (epartIsReg(modrm)) {
21994 UInt rE = eregOfRexRM(pfx,modrm);
21995 putYMMReg( rG, getYMMReg( rE ));
21996 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
21997 delta += 1;
21998 } else {
21999 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22000 gen_SEGV_if_not_32_aligned( addr );
22001 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
22002 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
22003 delta += alen;
22004 }
22005 goto decode_success;
22006 }
22007 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
22008 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22009 UChar modrm = getUChar(delta);
22010 UInt rG = gregOfRexRM(pfx, modrm);
22011 if (epartIsReg(modrm)) {
22012 UInt rE = eregOfRexRM(pfx,modrm);
22013 putYMMRegLoAndZU( rG, getXMMReg( rE ));
22014 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
22015 delta += 1;
22016 } else {
22017 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22018 gen_SEGV_if_not_16_aligned( addr );
22019 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
22020 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
22021 delta += alen;
22022 }
22023 goto decode_success;
22024 }
sewardj6fcd43e2012-06-14 08:51:35 +000022025 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
22026 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22027 UChar modrm = getUChar(delta);
22028 UInt rG = gregOfRexRM(pfx, modrm);
22029 if (epartIsReg(modrm)) {
22030 UInt rE = eregOfRexRM(pfx,modrm);
22031 putYMMReg( rG, getYMMReg( rE ));
22032 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
22033 delta += 1;
22034 } else {
22035 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22036 gen_SEGV_if_not_32_aligned( addr );
22037 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
22038 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
22039 delta += alen;
22040 }
22041 goto decode_success;
22042 }
sewardjc4530ae2012-05-21 10:18:49 +000022043 break;
22044
22045 case 0x29:
sewardj98d02cc2012-06-02 11:55:25 +000022046 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
22047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22048 UChar modrm = getUChar(delta);
22049 UInt rG = gregOfRexRM(pfx,modrm);
22050 if (epartIsReg(modrm)) {
22051 UInt rE = eregOfRexRM(pfx,modrm);
22052 putYMMRegLoAndZU( rE, getXMMReg(rG) );
22053 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
22054 delta += 1;
22055 } else {
22056 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22057 gen_SEGV_if_not_16_aligned( addr );
22058 storeLE( mkexpr(addr), getXMMReg(rG) );
22059 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
22060 delta += alen;
22061 }
22062 goto decode_success;
22063 }
22064 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
22065 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22066 UChar modrm = getUChar(delta);
22067 UInt rG = gregOfRexRM(pfx,modrm);
22068 if (epartIsReg(modrm)) {
22069 UInt rE = eregOfRexRM(pfx,modrm);
22070 putYMMReg( rE, getYMMReg(rG) );
22071 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
22072 delta += 1;
22073 } else {
22074 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22075 gen_SEGV_if_not_32_aligned( addr );
22076 storeLE( mkexpr(addr), getYMMReg(rG) );
22077 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
22078 delta += alen;
22079 }
22080 goto decode_success;
22081 }
sewardjc4530ae2012-05-21 10:18:49 +000022082 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
22083 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22084 UChar modrm = getUChar(delta);
22085 UInt rG = gregOfRexRM(pfx,modrm);
22086 if (epartIsReg(modrm)) {
22087 UInt rE = eregOfRexRM(pfx,modrm);
22088 putYMMRegLoAndZU( rE, getXMMReg(rG) );
22089 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
22090 delta += 1;
22091 goto decode_success;
22092 } else {
22093 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22094 gen_SEGV_if_not_16_aligned( addr );
22095 storeLE( mkexpr(addr), getXMMReg(rG) );
22096 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
22097 delta += alen;
22098 goto decode_success;
22099 }
22100 }
sewardj98d02cc2012-06-02 11:55:25 +000022101 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
22102 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000022103 UChar modrm = getUChar(delta);
22104 UInt rG = gregOfRexRM(pfx,modrm);
22105 if (epartIsReg(modrm)) {
22106 UInt rE = eregOfRexRM(pfx,modrm);
sewardj98d02cc2012-06-02 11:55:25 +000022107 putYMMReg( rE, getYMMReg(rG) );
22108 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000022109 delta += 1;
sewardj98d02cc2012-06-02 11:55:25 +000022110 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000022111 } else {
22112 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj98d02cc2012-06-02 11:55:25 +000022113 gen_SEGV_if_not_32_aligned( addr );
22114 storeLE( mkexpr(addr), getYMMReg(rG) );
22115 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
sewardjc4530ae2012-05-21 10:18:49 +000022116 delta += alen;
sewardj98d02cc2012-06-02 11:55:25 +000022117 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000022118 }
sewardjc4530ae2012-05-21 10:18:49 +000022119 }
22120 break;
22121
22122 case 0x2A: {
22123 IRTemp rmode = newTemp(Ity_I32);
22124 assign( rmode, get_sse_roundingmode() );
22125 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
22126 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
22127 UChar modrm = getUChar(delta);
22128 UInt rV = getVexNvvvv(pfx);
22129 UInt rD = gregOfRexRM(pfx, modrm);
22130 IRTemp arg32 = newTemp(Ity_I32);
22131 if (epartIsReg(modrm)) {
22132 UInt rS = eregOfRexRM(pfx,modrm);
22133 assign( arg32, getIReg32(rS) );
22134 delta += 1;
22135 DIP("vcvtsi2sdl %s,%s,%s\n",
22136 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
22137 } else {
22138 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22139 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
22140 delta += alen;
22141 DIP("vcvtsi2sdl %s,%s,%s\n",
22142 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22143 }
22144 putXMMRegLane64F( rD, 0,
22145 unop(Iop_I32StoF64, mkexpr(arg32)));
22146 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22147 putYMMRegLane128( rD, 1, mkV128(0) );
22148 *uses_vvvv = True;
22149 goto decode_success;
22150 }
22151 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
22152 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
22153 UChar modrm = getUChar(delta);
22154 UInt rV = getVexNvvvv(pfx);
22155 UInt rD = gregOfRexRM(pfx, modrm);
22156 IRTemp arg64 = newTemp(Ity_I64);
22157 if (epartIsReg(modrm)) {
22158 UInt rS = eregOfRexRM(pfx,modrm);
22159 assign( arg64, getIReg64(rS) );
22160 delta += 1;
22161 DIP("vcvtsi2sdq %s,%s,%s\n",
22162 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
22163 } else {
22164 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22165 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
22166 delta += alen;
22167 DIP("vcvtsi2sdq %s,%s,%s\n",
22168 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22169 }
22170 putXMMRegLane64F( rD, 0,
22171 binop( Iop_I64StoF64,
22172 get_sse_roundingmode(),
22173 mkexpr(arg64)) );
22174 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22175 putYMMRegLane128( rD, 1, mkV128(0) );
22176 *uses_vvvv = True;
22177 goto decode_success;
22178 }
22179 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
22180 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
22181 UChar modrm = getUChar(delta);
22182 UInt rV = getVexNvvvv(pfx);
22183 UInt rD = gregOfRexRM(pfx, modrm);
22184 IRTemp arg64 = newTemp(Ity_I64);
22185 if (epartIsReg(modrm)) {
22186 UInt rS = eregOfRexRM(pfx,modrm);
22187 assign( arg64, getIReg64(rS) );
22188 delta += 1;
22189 DIP("vcvtsi2ssq %s,%s,%s\n",
22190 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
22191 } else {
22192 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22193 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
22194 delta += alen;
22195 DIP("vcvtsi2ssq %s,%s,%s\n",
22196 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22197 }
22198 putXMMRegLane32F( rD, 0,
22199 binop(Iop_F64toF32,
22200 mkexpr(rmode),
22201 binop(Iop_I64StoF64, mkexpr(rmode),
22202 mkexpr(arg64)) ) );
22203 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
22204 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22205 putYMMRegLane128( rD, 1, mkV128(0) );
22206 *uses_vvvv = True;
22207 goto decode_success;
22208 }
22209 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
22210 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
22211 UChar modrm = getUChar(delta);
22212 UInt rV = getVexNvvvv(pfx);
22213 UInt rD = gregOfRexRM(pfx, modrm);
22214 IRTemp arg32 = newTemp(Ity_I32);
22215 if (epartIsReg(modrm)) {
22216 UInt rS = eregOfRexRM(pfx,modrm);
22217 assign( arg32, getIReg32(rS) );
22218 delta += 1;
22219 DIP("vcvtsi2ssl %s,%s,%s\n",
22220 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
22221 } else {
22222 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22223 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
22224 delta += alen;
22225 DIP("vcvtsi2ssl %s,%s,%s\n",
22226 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22227 }
22228 putXMMRegLane32F( rD, 0,
22229 binop(Iop_F64toF32,
22230 mkexpr(rmode),
22231 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
22232 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
22233 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22234 putYMMRegLane128( rD, 1, mkV128(0) );
22235 *uses_vvvv = True;
22236 goto decode_success;
22237 }
22238 break;
22239 }
22240
sewardj8eb7ae82012-06-24 14:00:27 +000022241 case 0x2B:
22242 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
22243 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
22244 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
22245 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
22246 UChar modrm = getUChar(delta);
22247 UInt rS = gregOfRexRM(pfx, modrm);
22248 IRTemp tS = newTemp(Ity_V128);
22249 assign(tS, getXMMReg(rS));
22250 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22251 delta += alen;
22252 gen_SEGV_if_not_16_aligned(addr);
22253 storeLE(mkexpr(addr), mkexpr(tS));
22254 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
22255 nameXMMReg(rS), dis_buf);
22256 goto decode_success;
22257 }
22258 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
22259 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
22260 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
22261 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
22262 UChar modrm = getUChar(delta);
22263 UInt rS = gregOfRexRM(pfx, modrm);
22264 IRTemp tS = newTemp(Ity_V256);
22265 assign(tS, getYMMReg(rS));
22266 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22267 delta += alen;
22268 gen_SEGV_if_not_32_aligned(addr);
22269 storeLE(mkexpr(addr), mkexpr(tS));
22270 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
22271 nameYMMReg(rS), dis_buf);
22272 goto decode_success;
22273 }
22274 break;
22275
sewardjc4530ae2012-05-21 10:18:49 +000022276 case 0x2C:
sewardj80804d12012-05-22 10:48:13 +000022277 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
sewardjc4530ae2012-05-21 10:18:49 +000022278 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
22279 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
22280 goto decode_success;
22281 }
22282 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
22283 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
22284 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
22285 goto decode_success;
22286 }
sewardj80804d12012-05-22 10:48:13 +000022287 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
22288 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
22289 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
22290 goto decode_success;
22291 }
sewardj8ef22422012-05-24 16:29:18 +000022292 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
22293 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
22294 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
22295 goto decode_success;
22296 }
sewardjc4530ae2012-05-21 10:18:49 +000022297 break;
22298
sewardjadf357c2012-06-24 13:44:17 +000022299 case 0x2D:
22300 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
22301 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
22302 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
22303 goto decode_success;
22304 }
22305 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
22306 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
22307 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
22308 goto decode_success;
22309 }
22310 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
22311 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
22312 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
22313 goto decode_success;
22314 }
22315 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
22316 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
22317 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
22318 goto decode_success;
22319 }
22320 break;
22321
sewardjc4530ae2012-05-21 10:18:49 +000022322 case 0x2E:
sewardj4ed05e02012-06-18 15:01:30 +000022323 case 0x2F:
sewardjc4530ae2012-05-21 10:18:49 +000022324 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000022325 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000022326 if (have66noF2noF3(pfx)) {
22327 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
22328 goto decode_success;
22329 }
22330 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000022331 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000022332 if (haveNo66noF2noF3(pfx)) {
22333 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
22334 goto decode_success;
22335 }
22336 break;
22337
sewardj8eb7ae82012-06-24 14:00:27 +000022338 case 0x50:
22339 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
22340 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22341 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
22342 goto decode_success;
22343 }
22344 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
22345 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22346 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
22347 goto decode_success;
22348 }
22349 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
22350 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22351 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
22352 goto decode_success;
22353 }
22354 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
22355 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22356 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
22357 goto decode_success;
22358 }
22359 break;
22360
sewardjc4530ae2012-05-21 10:18:49 +000022361 case 0x51:
sewardj66becf32012-06-18 23:15:16 +000022362 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
22363 if (haveF3no66noF2(pfx)) {
22364 delta = dis_AVX128_E_V_to_G_lo32_unary(
22365 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
22366 goto decode_success;
22367 }
22368 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
22369 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22370 delta = dis_AVX128_E_to_G_unary_all(
22371 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
22372 goto decode_success;
22373 }
22374 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
22375 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22376 delta = dis_AVX256_E_to_G_unary_all(
22377 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
22378 goto decode_success;
22379 }
sewardjc4530ae2012-05-21 10:18:49 +000022380 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
22381 if (haveF2no66noF3(pfx)) {
22382 delta = dis_AVX128_E_V_to_G_lo64_unary(
22383 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
22384 goto decode_success;
sewardj66becf32012-06-18 23:15:16 +000022385 }
22386 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
22387 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22388 delta = dis_AVX128_E_to_G_unary_all(
22389 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
22390 goto decode_success;
22391 }
22392 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
22393 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22394 delta = dis_AVX256_E_to_G_unary_all(
22395 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
22396 goto decode_success;
22397 }
22398 break;
22399
22400 case 0x52:
22401 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
22402 if (haveF3no66noF2(pfx)) {
22403 delta = dis_AVX128_E_V_to_G_lo32_unary(
22404 uses_vvvv, vbi, pfx, delta, "vrsqrtss", Iop_RSqrt32F0x4 );
22405 goto decode_success;
22406 }
22407 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
22408 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22409 delta = dis_AVX128_E_to_G_unary_all(
22410 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx4 );
22411 goto decode_success;
22412 }
22413 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
22414 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22415 delta = dis_AVX256_E_to_G_unary_all(
22416 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx8 );
22417 goto decode_success;
22418 }
22419 break;
sewardjc4530ae2012-05-21 10:18:49 +000022420
sewardj82096922012-06-24 14:57:59 +000022421 case 0x53:
22422 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
22423 if (haveF3no66noF2(pfx)) {
22424 delta = dis_AVX128_E_V_to_G_lo32_unary(
22425 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_Recip32F0x4 );
22426 goto decode_success;
22427 }
22428 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
22429 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22430 delta = dis_AVX128_E_to_G_unary_all(
22431 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx4 );
22432 goto decode_success;
22433 }
22434 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
22435 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22436 delta = dis_AVX256_E_to_G_unary_all(
22437 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx8 );
22438 goto decode_success;
22439 }
22440 break;
22441
sewardjc4530ae2012-05-21 10:18:49 +000022442 case 0x54:
sewardj251b59e2012-05-25 13:51:07 +000022443 /* VANDPD r/m, rV, r ::: r = rV & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022444 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
22445 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22446 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22447 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
22448 goto decode_success;
22449 }
sewardj4b1cc832012-06-13 11:10:20 +000022450 /* VANDPD r/m, rV, r ::: r = rV & r/m */
22451 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
22452 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22453 delta = dis_AVX256_E_V_to_G(
22454 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
22455 goto decode_success;
22456 }
sewardjc4530ae2012-05-21 10:18:49 +000022457 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
22458 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22459 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22460 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
22461 goto decode_success;
22462 }
sewardj2a2bda92012-06-14 23:32:02 +000022463 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
22464 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22465 delta = dis_AVX256_E_V_to_G(
22466 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
22467 goto decode_success;
22468 }
sewardjc4530ae2012-05-21 10:18:49 +000022469 break;
22470
22471 case 0x55:
sewardj251b59e2012-05-25 13:51:07 +000022472 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022473 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
22474 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22475 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22476 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000022477 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000022478 goto decode_success;
22479 }
sewardj2a2bda92012-06-14 23:32:02 +000022480 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
22481 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22482 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
22483 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
22484 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
22485 goto decode_success;
22486 }
sewardjc4530ae2012-05-21 10:18:49 +000022487 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
22488 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22489 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22490 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000022491 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000022492 goto decode_success;
22493 }
sewardj2a2bda92012-06-14 23:32:02 +000022494 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
22495 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22496 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
22497 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
22498 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
22499 goto decode_success;
22500 }
sewardjc4530ae2012-05-21 10:18:49 +000022501 break;
22502
22503 case 0x56:
sewardj251b59e2012-05-25 13:51:07 +000022504 /* VORPD r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022505 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
22506 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22507 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22508 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
22509 goto decode_success;
22510 }
sewardj2a2bda92012-06-14 23:32:02 +000022511 /* VORPD r/m, rV, r ::: r = rV | r/m */
22512 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
22513 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22514 delta = dis_AVX256_E_V_to_G(
22515 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
22516 goto decode_success;
22517 }
sewardj251b59e2012-05-25 13:51:07 +000022518 /* VORPS r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022519 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
22520 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22521 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22522 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
22523 goto decode_success;
22524 }
sewardj2a2bda92012-06-14 23:32:02 +000022525 /* VORPS r/m, rV, r ::: r = rV | r/m */
22526 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
22527 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22528 delta = dis_AVX256_E_V_to_G(
22529 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
22530 goto decode_success;
22531 }
sewardjc4530ae2012-05-21 10:18:49 +000022532 break;
22533
22534 case 0x57:
sewardj251b59e2012-05-25 13:51:07 +000022535 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022536 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
22537 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22538 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22539 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
22540 goto decode_success;
22541 }
sewardj4b1cc832012-06-13 11:10:20 +000022542 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
22543 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
22544 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22545 delta = dis_AVX256_E_V_to_G(
22546 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
22547 goto decode_success;
22548 }
sewardj251b59e2012-05-25 13:51:07 +000022549 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000022550 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
22551 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22552 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22553 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
22554 goto decode_success;
22555 }
sewardj2a2bda92012-06-14 23:32:02 +000022556 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
22557 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
22558 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22559 delta = dis_AVX256_E_V_to_G(
22560 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
22561 goto decode_success;
22562 }
sewardjc4530ae2012-05-21 10:18:49 +000022563 break;
22564
22565 case 0x58:
22566 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
22567 if (haveF2no66noF3(pfx)) {
22568 delta = dis_AVX128_E_V_to_G_lo64(
22569 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
22570 goto decode_success;
22571 }
22572 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
22573 if (haveF3no66noF2(pfx)) {
22574 delta = dis_AVX128_E_V_to_G_lo32(
22575 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
22576 goto decode_success;
22577 }
sewardj251b59e2012-05-25 13:51:07 +000022578 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
22579 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22580 delta = dis_AVX128_E_V_to_G(
22581 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
22582 goto decode_success;
22583 }
sewardj56c30312012-06-12 08:45:39 +000022584 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
22585 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22586 delta = dis_AVX256_E_V_to_G(
22587 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
22588 goto decode_success;
22589 }
22590 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
22591 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22592 delta = dis_AVX128_E_V_to_G(
22593 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
22594 goto decode_success;
22595 }
22596 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
22597 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22598 delta = dis_AVX256_E_V_to_G(
22599 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
22600 goto decode_success;
22601 }
sewardjc4530ae2012-05-21 10:18:49 +000022602 break;
22603
22604 case 0x59:
22605 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
22606 if (haveF2no66noF3(pfx)) {
22607 delta = dis_AVX128_E_V_to_G_lo64(
22608 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
22609 goto decode_success;
22610 }
22611 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
22612 if (haveF3no66noF2(pfx)) {
22613 delta = dis_AVX128_E_V_to_G_lo32(
22614 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
22615 goto decode_success;
22616 }
sewardj251b59e2012-05-25 13:51:07 +000022617 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
22618 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22619 delta = dis_AVX128_E_V_to_G(
22620 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
22621 goto decode_success;
22622 }
sewardj56c30312012-06-12 08:45:39 +000022623 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
22624 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22625 delta = dis_AVX256_E_V_to_G(
22626 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
22627 goto decode_success;
22628 }
22629 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
22630 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22631 delta = dis_AVX128_E_V_to_G(
22632 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
22633 goto decode_success;
22634 }
22635 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
22636 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22637 delta = dis_AVX256_E_V_to_G(
22638 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
22639 goto decode_success;
22640 }
sewardjc4530ae2012-05-21 10:18:49 +000022641 break;
22642
22643 case 0x5A:
22644 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000022645 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000022646 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
22647 goto decode_success;
22648 }
22649 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
22650 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22651 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000022652 goto decode_success;
22653 }
22654 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000022655 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000022656 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
22657 goto decode_success;
22658 }
22659 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
22660 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22661 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000022662 goto decode_success;
22663 }
sewardj72df0682012-05-23 23:54:30 +000022664 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
22665 if (haveF2no66noF3(pfx)) {
22666 UChar modrm = getUChar(delta);
22667 UInt rV = getVexNvvvv(pfx);
22668 UInt rD = gregOfRexRM(pfx, modrm);
22669 IRTemp f64lo = newTemp(Ity_F64);
22670 IRTemp rmode = newTemp(Ity_I32);
22671 assign( rmode, get_sse_roundingmode() );
22672 if (epartIsReg(modrm)) {
22673 UInt rS = eregOfRexRM(pfx,modrm);
22674 assign(f64lo, getXMMRegLane64F(rS, 0));
22675 delta += 1;
22676 DIP("vcvtsd2ss %s,%s,%s\n",
22677 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
22678 } else {
22679 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22680 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
22681 delta += alen;
22682 DIP("vcvtsd2ss %s,%s,%s\n",
22683 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22684 }
22685 putXMMRegLane32F( rD, 0,
22686 binop( Iop_F64toF32, mkexpr(rmode),
22687 mkexpr(f64lo)) );
22688 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
22689 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22690 putYMMRegLane128( rD, 1, mkV128(0) );
22691 *uses_vvvv = True;
22692 goto decode_success;
22693 }
22694 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
22695 if (haveF3no66noF2(pfx)) {
22696 UChar modrm = getUChar(delta);
22697 UInt rV = getVexNvvvv(pfx);
22698 UInt rD = gregOfRexRM(pfx, modrm);
22699 IRTemp f32lo = newTemp(Ity_F32);
22700 if (epartIsReg(modrm)) {
22701 UInt rS = eregOfRexRM(pfx,modrm);
22702 assign(f32lo, getXMMRegLane32F(rS, 0));
22703 delta += 1;
22704 DIP("vcvtss2sd %s,%s,%s\n",
22705 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
22706 } else {
22707 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22708 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
22709 delta += alen;
22710 DIP("vcvtss2sd %s,%s,%s\n",
22711 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
22712 }
22713 putXMMRegLane64F( rD, 0,
22714 unop( Iop_F32toF64, mkexpr(f32lo)) );
22715 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
22716 putYMMRegLane128( rD, 1, mkV128(0) );
22717 *uses_vvvv = True;
22718 goto decode_success;
22719 }
sewardjc4530ae2012-05-21 10:18:49 +000022720 break;
22721
sewardj251b59e2012-05-25 13:51:07 +000022722 case 0x5B:
22723 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
22724 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000022725 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
22726 True/*isAvx*/, False/*!r2zero*/ );
22727 goto decode_success;
22728 }
22729 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
22730 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22731 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
22732 False/*!r2zero*/ );
22733 goto decode_success;
22734 }
22735 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
22736 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
22737 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
22738 True/*isAvx*/, True/*r2zero*/ );
22739 goto decode_success;
22740 }
22741 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
22742 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
22743 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
22744 True/*r2zero*/ );
22745 goto decode_success;
22746 }
22747 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
22748 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22749 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
22750 goto decode_success;
22751 }
22752 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
22753 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22754 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
sewardj251b59e2012-05-25 13:51:07 +000022755 goto decode_success;
22756 }
22757 break;
22758
sewardjc4530ae2012-05-21 10:18:49 +000022759 case 0x5C:
22760 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
22761 if (haveF2no66noF3(pfx)) {
22762 delta = dis_AVX128_E_V_to_G_lo64(
22763 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
22764 goto decode_success;
22765 }
22766 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
22767 if (haveF3no66noF2(pfx)) {
22768 delta = dis_AVX128_E_V_to_G_lo32(
22769 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
22770 goto decode_success;
22771 }
sewardj251b59e2012-05-25 13:51:07 +000022772 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
22773 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22774 delta = dis_AVX128_E_V_to_G(
22775 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
22776 goto decode_success;
22777 }
sewardj56c30312012-06-12 08:45:39 +000022778 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
22779 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22780 delta = dis_AVX256_E_V_to_G(
22781 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
22782 goto decode_success;
22783 }
22784 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
22785 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22786 delta = dis_AVX128_E_V_to_G(
22787 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
22788 goto decode_success;
22789 }
22790 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
22791 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22792 delta = dis_AVX256_E_V_to_G(
22793 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
22794 goto decode_success;
22795 }
sewardjc4530ae2012-05-21 10:18:49 +000022796 break;
22797
22798 case 0x5D:
22799 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
22800 if (haveF2no66noF3(pfx)) {
22801 delta = dis_AVX128_E_V_to_G_lo64(
22802 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
22803 goto decode_success;
22804 }
22805 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
22806 if (haveF3no66noF2(pfx)) {
22807 delta = dis_AVX128_E_V_to_G_lo32(
22808 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
22809 goto decode_success;
22810 }
sewardj251b59e2012-05-25 13:51:07 +000022811 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
22812 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22813 delta = dis_AVX128_E_V_to_G(
22814 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
22815 goto decode_success;
22816 }
sewardj8eb7ae82012-06-24 14:00:27 +000022817 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
22818 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22819 delta = dis_AVX256_E_V_to_G(
22820 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
22821 goto decode_success;
22822 }
22823 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
22824 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22825 delta = dis_AVX128_E_V_to_G(
22826 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
22827 goto decode_success;
22828 }
22829 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
22830 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22831 delta = dis_AVX256_E_V_to_G(
22832 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
22833 goto decode_success;
22834 }
sewardjc4530ae2012-05-21 10:18:49 +000022835 break;
22836
22837 case 0x5E:
22838 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
22839 if (haveF2no66noF3(pfx)) {
22840 delta = dis_AVX128_E_V_to_G_lo64(
22841 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
22842 goto decode_success;
22843 }
22844 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
22845 if (haveF3no66noF2(pfx)) {
22846 delta = dis_AVX128_E_V_to_G_lo32(
22847 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
22848 goto decode_success;
22849 }
sewardj2a2bda92012-06-14 23:32:02 +000022850 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
22851 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22852 delta = dis_AVX128_E_V_to_G(
22853 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
22854 goto decode_success;
22855 }
sewardj56c30312012-06-12 08:45:39 +000022856 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
22857 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22858 delta = dis_AVX256_E_V_to_G(
22859 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
22860 goto decode_success;
22861 }
sewardj4b1cc832012-06-13 11:10:20 +000022862 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
22863 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22864 delta = dis_AVX128_E_V_to_G(
22865 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
22866 goto decode_success;
22867 }
sewardj56c30312012-06-12 08:45:39 +000022868 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
22869 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22870 delta = dis_AVX256_E_V_to_G(
22871 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
22872 goto decode_success;
22873 }
sewardjc4530ae2012-05-21 10:18:49 +000022874 break;
22875
22876 case 0x5F:
22877 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
22878 if (haveF2no66noF3(pfx)) {
22879 delta = dis_AVX128_E_V_to_G_lo64(
22880 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
22881 goto decode_success;
22882 }
22883 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
22884 if (haveF3no66noF2(pfx)) {
22885 delta = dis_AVX128_E_V_to_G_lo32(
22886 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
22887 goto decode_success;
22888 }
sewardj251b59e2012-05-25 13:51:07 +000022889 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
22890 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22891 delta = dis_AVX128_E_V_to_G(
22892 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
22893 goto decode_success;
22894 }
sewardj8eb7ae82012-06-24 14:00:27 +000022895 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
22896 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22897 delta = dis_AVX256_E_V_to_G(
22898 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
22899 goto decode_success;
22900 }
22901 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
22902 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22903 delta = dis_AVX128_E_V_to_G(
22904 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
22905 goto decode_success;
22906 }
22907 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
22908 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
22909 delta = dis_AVX256_E_V_to_G(
22910 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
22911 goto decode_success;
22912 }
sewardjc4530ae2012-05-21 10:18:49 +000022913 break;
22914
sewardj44565e82012-05-22 09:14:15 +000022915 case 0x60:
sewardj251b59e2012-05-25 13:51:07 +000022916 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000022917 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
sewardj44565e82012-05-22 09:14:15 +000022918 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22919 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22920 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
22921 Iop_InterleaveLO8x16, NULL,
22922 False/*!invertLeftArg*/, True/*swapArgs*/ );
22923 goto decode_success;
22924 }
22925 break;
22926
sewardj251b59e2012-05-25 13:51:07 +000022927 case 0x61:
22928 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000022929 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
sewardj251b59e2012-05-25 13:51:07 +000022930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22931 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22932 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
22933 Iop_InterleaveLO16x8, NULL,
22934 False/*!invertLeftArg*/, True/*swapArgs*/ );
22935 goto decode_success;
22936 }
22937 break;
22938
sewardj6faf7cc2012-05-25 15:53:01 +000022939 case 0x62:
22940 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
22941 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
22942 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22943 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22944 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
22945 Iop_InterleaveLO32x4, NULL,
22946 False/*!invertLeftArg*/, True/*swapArgs*/ );
22947 goto decode_success;
22948 }
22949 break;
22950
sewardj8516a1f2012-06-24 14:26:30 +000022951 case 0x63:
22952 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
22953 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
22954 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22955 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22956 uses_vvvv, vbi, pfx, delta, "vpacksswb",
22957 Iop_QNarrowBin16Sto8Sx16, NULL,
22958 False/*!invertLeftArg*/, True/*swapArgs*/ );
22959 goto decode_success;
22960 }
22961 break;
22962
sewardj89378162012-06-24 12:12:20 +000022963 case 0x64:
22964 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
22965 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
22966 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22967 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22968 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
22969 goto decode_success;
22970 }
22971 break;
22972
22973 case 0x65:
22974 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
22975 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
22976 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22977 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22978 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
22979 goto decode_success;
22980 }
22981 break;
22982
sewardj4ed05e02012-06-18 15:01:30 +000022983 case 0x66:
22984 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
22985 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
22986 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22987 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
22988 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
22989 goto decode_success;
22990 }
22991 break;
22992
sewardj6c4e45c2012-05-24 00:09:27 +000022993 case 0x67:
22994 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000022995 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
sewardj6c4e45c2012-05-24 00:09:27 +000022996 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
22997 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22998 uses_vvvv, vbi, pfx, delta, "vpackuswb",
22999 Iop_QNarrowBin16Sto8Ux16, NULL,
23000 False/*!invertLeftArg*/, True/*swapArgs*/ );
23001 goto decode_success;
23002 }
23003 break;
23004
sewardj44565e82012-05-22 09:14:15 +000023005 case 0x68:
sewardj251b59e2012-05-25 13:51:07 +000023006 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
sewardj44565e82012-05-22 09:14:15 +000023007 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
23008 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23009 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23010 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
23011 Iop_InterleaveHI8x16, NULL,
23012 False/*!invertLeftArg*/, True/*swapArgs*/ );
23013 goto decode_success;
23014 }
23015 break;
23016
sewardj251b59e2012-05-25 13:51:07 +000023017 case 0x69:
23018 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
23019 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
23020 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23021 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23022 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
23023 Iop_InterleaveHI16x8, NULL,
23024 False/*!invertLeftArg*/, True/*swapArgs*/ );
23025 goto decode_success;
23026 }
23027 break;
23028
sewardj6fcd43e2012-06-14 08:51:35 +000023029 case 0x6A:
23030 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
23031 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
23032 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23033 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23034 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
23035 Iop_InterleaveHI32x4, NULL,
23036 False/*!invertLeftArg*/, True/*swapArgs*/ );
23037 goto decode_success;
23038 }
23039 break;
23040
sewardj6faf7cc2012-05-25 15:53:01 +000023041 case 0x6B:
23042 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
23043 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
23044 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23045 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23046 uses_vvvv, vbi, pfx, delta, "vpackssdw",
23047 Iop_QNarrowBin32Sto16Sx8, NULL,
23048 False/*!invertLeftArg*/, True/*swapArgs*/ );
23049 goto decode_success;
23050 }
23051 break;
23052
23053 case 0x6C:
23054 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
23055 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
23056 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23057 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23058 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
23059 Iop_InterleaveLO64x2, NULL,
23060 False/*!invertLeftArg*/, True/*swapArgs*/ );
23061 goto decode_success;
23062 }
23063 break;
23064
sewardjfe0c5e72012-06-15 15:48:07 +000023065 case 0x6D:
23066 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
23067 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
23068 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23069 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23070 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
23071 Iop_InterleaveHI64x2, NULL,
23072 False/*!invertLeftArg*/, True/*swapArgs*/ );
23073 goto decode_success;
23074 }
23075 break;
23076
sewardjc4530ae2012-05-21 10:18:49 +000023077 case 0x6E:
23078 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
23079 if (have66noF2noF3(pfx)
23080 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
23081 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
23082 UChar modrm = getUChar(delta);
23083 if (epartIsReg(modrm)) {
23084 delta += 1;
23085 putYMMRegLoAndZU(
23086 gregOfRexRM(pfx,modrm),
23087 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
23088 );
23089 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
23090 nameXMMReg(gregOfRexRM(pfx,modrm)));
23091 } else {
23092 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23093 delta += alen;
23094 putYMMRegLoAndZU(
23095 gregOfRexRM(pfx,modrm),
23096 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
23097 );
23098 DIP("vmovd %s, %s\n", dis_buf,
23099 nameXMMReg(gregOfRexRM(pfx,modrm)));
23100 }
23101 goto decode_success;
23102 }
sewardj6eaf00c2012-05-23 11:33:56 +000023103 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
23104 if (have66noF2noF3(pfx)
23105 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
23106 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
23107 UChar modrm = getUChar(delta);
23108 if (epartIsReg(modrm)) {
23109 delta += 1;
23110 putYMMRegLoAndZU(
23111 gregOfRexRM(pfx,modrm),
23112 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
23113 );
23114 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
23115 nameXMMReg(gregOfRexRM(pfx,modrm)));
23116 } else {
23117 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23118 delta += alen;
23119 putYMMRegLoAndZU(
23120 gregOfRexRM(pfx,modrm),
23121 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
23122 );
23123 DIP("vmovq %s, %s\n", dis_buf,
23124 nameXMMReg(gregOfRexRM(pfx,modrm)));
23125 }
23126 goto decode_success;
23127 }
sewardjc4530ae2012-05-21 10:18:49 +000023128 break;
23129
23130 case 0x6F:
23131 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
23132 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
sewardj66becf32012-06-18 23:15:16 +000023133 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
sewardjc4530ae2012-05-21 10:18:49 +000023134 && 1==getVexL(pfx)/*256*/) {
23135 UChar modrm = getUChar(delta);
23136 UInt rD = gregOfRexRM(pfx, modrm);
23137 IRTemp tD = newTemp(Ity_V256);
23138 Bool isA = have66noF2noF3(pfx);
23139 UChar ch = isA ? 'a' : 'u';
23140 if (epartIsReg(modrm)) {
23141 UInt rS = eregOfRexRM(pfx, modrm);
23142 delta += 1;
23143 assign(tD, getYMMReg(rS));
23144 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
23145 } else {
23146 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23147 delta += alen;
23148 if (isA)
23149 gen_SEGV_if_not_32_aligned(addr);
23150 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
23151 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
23152 }
23153 putYMMReg(rD, mkexpr(tD));
23154 goto decode_success;
23155 }
23156 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
23157 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
23158 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
23159 && 0==getVexL(pfx)/*128*/) {
23160 UChar modrm = getUChar(delta);
23161 UInt rD = gregOfRexRM(pfx, modrm);
23162 IRTemp tD = newTemp(Ity_V128);
23163 Bool isA = have66noF2noF3(pfx);
23164 UChar ch = isA ? 'a' : 'u';
23165 if (epartIsReg(modrm)) {
23166 UInt rS = eregOfRexRM(pfx, modrm);
23167 delta += 1;
23168 assign(tD, getXMMReg(rS));
23169 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
23170 } else {
23171 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23172 delta += alen;
23173 if (isA)
23174 gen_SEGV_if_not_16_aligned(addr);
23175 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
23176 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
23177 }
23178 putYMMRegLoAndZU(rD, mkexpr(tD));
23179 goto decode_success;
23180 }
23181 break;
23182
23183 case 0x70:
23184 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
23185 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23186 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
23187 goto decode_success;
23188 }
sewardj251b59e2012-05-25 13:51:07 +000023189 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
23190 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23191 delta = dis_PSHUFxW_128( vbi, pfx, delta,
23192 True/*isAvx*/, False/*!xIsH*/ );
23193 goto decode_success;
23194 }
23195 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
23196 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
23197 delta = dis_PSHUFxW_128( vbi, pfx, delta,
23198 True/*isAvx*/, True/*xIsH*/ );
23199 goto decode_success;
23200 }
sewardjc4530ae2012-05-21 10:18:49 +000023201 break;
23202
sewardj6faf7cc2012-05-25 15:53:01 +000023203 case 0x71:
23204 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
sewardjfe0c5e72012-06-15 15:48:07 +000023205 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
sewardj15ad1942012-06-20 10:21:05 +000023206 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
sewardj6faf7cc2012-05-25 15:53:01 +000023207 if (have66noF2noF3(pfx)
23208 && 0==getVexL(pfx)/*128*/
23209 && epartIsReg(getUChar(delta))) {
23210 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
23211 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23212 "vpsrlw", Iop_ShrN16x8 );
23213 *uses_vvvv = True;
23214 goto decode_success;
23215 }
sewardjfe0c5e72012-06-15 15:48:07 +000023216 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
23217 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23218 "vpsraw", Iop_SarN16x8 );
23219 *uses_vvvv = True;
23220 goto decode_success;
23221 }
sewardj15ad1942012-06-20 10:21:05 +000023222 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
23223 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23224 "vpsllw", Iop_ShlN16x8 );
23225 *uses_vvvv = True;
23226 goto decode_success;
23227 }
sewardj6faf7cc2012-05-25 15:53:01 +000023228 /* else fall through */
23229 }
23230 break;
23231
sewardjc4530ae2012-05-21 10:18:49 +000023232 case 0x72:
sewardj251b59e2012-05-25 13:51:07 +000023233 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
sewardj15ad1942012-06-20 10:21:05 +000023234 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
23235 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000023236 if (have66noF2noF3(pfx)
23237 && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000023238 && epartIsReg(getUChar(delta))) {
sewardj251b59e2012-05-25 13:51:07 +000023239 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
23240 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23241 "vpsrld", Iop_ShrN32x4 );
23242 *uses_vvvv = True;
23243 goto decode_success;
23244 }
sewardj15ad1942012-06-20 10:21:05 +000023245 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
23246 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23247 "vpsrad", Iop_SarN32x4 );
23248 *uses_vvvv = True;
23249 goto decode_success;
23250 }
23251 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
23252 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23253 "vpslld", Iop_ShlN32x4 );
23254 *uses_vvvv = True;
23255 goto decode_success;
23256 }
sewardj251b59e2012-05-25 13:51:07 +000023257 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000023258 }
23259 break;
23260
23261 case 0x73:
sewardj251b59e2012-05-25 13:51:07 +000023262 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
23263 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
sewardj56c30312012-06-12 08:45:39 +000023264 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
sewardje8a7eb72012-06-12 14:59:17 +000023265 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000023266 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000023267 && epartIsReg(getUChar(delta))) {
23268 Int rS = eregOfRexRM(pfx,getUChar(delta));
23269 Int rD = getVexNvvvv(pfx);
sewardjc4530ae2012-05-21 10:18:49 +000023270 IRTemp vecS = newTemp(Ity_V128);
sewardj251b59e2012-05-25 13:51:07 +000023271 if (gregLO3ofRM(getUChar(delta)) == 3) {
23272 Int imm = (Int)getUChar(delta+1);
23273 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
23274 delta += 2;
23275 assign( vecS, getXMMReg(rS) );
23276 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
23277 *uses_vvvv = True;
23278 goto decode_success;
23279 }
23280 if (gregLO3ofRM(getUChar(delta)) == 7) {
23281 Int imm = (Int)getUChar(delta+1);
23282 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
23283 delta += 2;
23284 assign( vecS, getXMMReg(rS) );
23285 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
23286 *uses_vvvv = True;
23287 goto decode_success;
23288 }
sewardj56c30312012-06-12 08:45:39 +000023289 if (gregLO3ofRM(getUChar(delta)) == 2) {
23290 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23291 "vpsrlq", Iop_ShrN64x2 );
23292 *uses_vvvv = True;
23293 goto decode_success;
23294 }
sewardje8a7eb72012-06-12 14:59:17 +000023295 if (gregLO3ofRM(getUChar(delta)) == 6) {
23296 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
23297 "vpsllq", Iop_ShlN64x2 );
23298 *uses_vvvv = True;
23299 goto decode_success;
23300 }
sewardj251b59e2012-05-25 13:51:07 +000023301 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000023302 }
23303 break;
23304
sewardj8ef22422012-05-24 16:29:18 +000023305 case 0x74:
sewardj251b59e2012-05-25 13:51:07 +000023306 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
sewardj8ef22422012-05-24 16:29:18 +000023307 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
23308 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23309 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
23310 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
23311 goto decode_success;
23312 }
23313 break;
23314
sewardj6fcd43e2012-06-14 08:51:35 +000023315 case 0x75:
23316 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
23317 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
23318 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23319 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
23320 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
23321 goto decode_success;
23322 }
23323 break;
23324
sewardjc4530ae2012-05-21 10:18:49 +000023325 case 0x76:
sewardj251b59e2012-05-25 13:51:07 +000023326 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
sewardjc4530ae2012-05-21 10:18:49 +000023327 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
23328 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23329 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
23330 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
23331 goto decode_success;
23332 }
23333 break;
23334
23335 case 0x77:
23336 /* VZEROUPPER = VEX.128.0F.WIG 77 */
23337 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23338 Int i;
23339 IRTemp zero128 = newTemp(Ity_V128);
23340 assign(zero128, mkV128(0));
23341 for (i = 0; i < 16; i++) {
23342 putYMMRegLane128(i, 1, mkexpr(zero128));
23343 }
23344 DIP("vzeroupper\n");
23345 goto decode_success;
23346 }
sewardj66becf32012-06-18 23:15:16 +000023347 /* VZEROALL = VEX.256.0F.WIG 77 */
23348 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23349 Int i;
23350 IRTemp zero128 = newTemp(Ity_V128);
23351 assign(zero128, mkV128(0));
23352 for (i = 0; i < 16; i++) {
23353 putYMMRegLoAndZU(i, mkexpr(zero128));
23354 }
23355 DIP("vzeroall\n");
23356 goto decode_success;
23357 }
sewardjc4530ae2012-05-21 10:18:49 +000023358 break;
23359
sewardjadf357c2012-06-24 13:44:17 +000023360 case 0x7C:
23361 case 0x7D:
23362 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
23363 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
23364 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23365 IRTemp sV = newTemp(Ity_V128);
23366 IRTemp dV = newTemp(Ity_V128);
23367 Bool isAdd = opc == 0x7C;
23368 HChar* str = isAdd ? "add" : "sub";
23369 UChar modrm = getUChar(delta);
23370 UInt rG = gregOfRexRM(pfx,modrm);
23371 UInt rV = getVexNvvvv(pfx);
23372 if (epartIsReg(modrm)) {
23373 UInt rE = eregOfRexRM(pfx,modrm);
23374 assign( sV, getXMMReg(rE) );
23375 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
23376 nameXMMReg(rV), nameXMMReg(rG));
23377 delta += 1;
23378 } else {
23379 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23380 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23381 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
23382 nameXMMReg(rV), nameXMMReg(rG));
23383 delta += alen;
23384 }
23385 assign( dV, getXMMReg(rV) );
23386 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
23387 *uses_vvvv = True;
23388 goto decode_success;
23389 }
23390 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
23391 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
23392 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23393 IRTemp sV = newTemp(Ity_V256);
23394 IRTemp dV = newTemp(Ity_V256);
23395 IRTemp s1, s0, d1, d0;
23396 Bool isAdd = opc == 0x7C;
23397 HChar* str = isAdd ? "add" : "sub";
23398 UChar modrm = getUChar(delta);
23399 UInt rG = gregOfRexRM(pfx,modrm);
23400 UInt rV = getVexNvvvv(pfx);
23401 s1 = s0 = d1 = d0 = IRTemp_INVALID;
23402 if (epartIsReg(modrm)) {
23403 UInt rE = eregOfRexRM(pfx,modrm);
23404 assign( sV, getYMMReg(rE) );
23405 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
23406 nameYMMReg(rV), nameYMMReg(rG));
23407 delta += 1;
23408 } else {
23409 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23410 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
23411 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
23412 nameYMMReg(rV), nameYMMReg(rG));
23413 delta += alen;
23414 }
23415 assign( dV, getYMMReg(rV) );
23416 breakupV256toV128s( dV, &d1, &d0 );
23417 breakupV256toV128s( sV, &s1, &s0 );
23418 putYMMReg( rG, binop(Iop_V128HLtoV256,
23419 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
23420 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
23421 *uses_vvvv = True;
23422 goto decode_success;
23423 }
23424 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
23425 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
23426 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23427 IRTemp sV = newTemp(Ity_V128);
23428 IRTemp dV = newTemp(Ity_V128);
23429 Bool isAdd = opc == 0x7C;
23430 HChar* str = isAdd ? "add" : "sub";
23431 UChar modrm = getUChar(delta);
23432 UInt rG = gregOfRexRM(pfx,modrm);
23433 UInt rV = getVexNvvvv(pfx);
23434 if (epartIsReg(modrm)) {
23435 UInt rE = eregOfRexRM(pfx,modrm);
23436 assign( sV, getXMMReg(rE) );
23437 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
23438 nameXMMReg(rV), nameXMMReg(rG));
23439 delta += 1;
23440 } else {
23441 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23442 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23443 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
23444 nameXMMReg(rV), nameXMMReg(rG));
23445 delta += alen;
23446 }
23447 assign( dV, getXMMReg(rV) );
23448 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
23449 *uses_vvvv = True;
23450 goto decode_success;
23451 }
23452 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
23453 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
23454 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23455 IRTemp sV = newTemp(Ity_V256);
23456 IRTemp dV = newTemp(Ity_V256);
23457 IRTemp s1, s0, d1, d0;
23458 Bool isAdd = opc == 0x7C;
23459 HChar* str = isAdd ? "add" : "sub";
23460 UChar modrm = getUChar(delta);
23461 UInt rG = gregOfRexRM(pfx,modrm);
23462 UInt rV = getVexNvvvv(pfx);
23463 s1 = s0 = d1 = d0 = IRTemp_INVALID;
23464 if (epartIsReg(modrm)) {
23465 UInt rE = eregOfRexRM(pfx,modrm);
23466 assign( sV, getYMMReg(rE) );
23467 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
23468 nameYMMReg(rV), nameYMMReg(rG));
23469 delta += 1;
23470 } else {
23471 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23472 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
23473 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
23474 nameYMMReg(rV), nameYMMReg(rG));
23475 delta += alen;
23476 }
23477 assign( dV, getYMMReg(rV) );
23478 breakupV256toV128s( dV, &d1, &d0 );
23479 breakupV256toV128s( sV, &s1, &s0 );
23480 putYMMReg( rG, binop(Iop_V128HLtoV256,
23481 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
23482 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
23483 *uses_vvvv = True;
23484 goto decode_success;
23485 }
23486 break;
23487
sewardjc4530ae2012-05-21 10:18:49 +000023488 case 0x7E:
23489 /* Note the Intel docs don't make sense for this. I think they
23490 are wrong. They seem to imply it is a store when in fact I
23491 think it is a load. Also it's unclear whether this is W0, W1
23492 or WIG. */
sewardj6be43242012-05-22 23:12:13 +000023493 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
sewardjc4530ae2012-05-21 10:18:49 +000023494 if (haveF3no66noF2(pfx)
23495 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
23496 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
23497 UChar modrm = getUChar(delta);
23498 UInt rG = gregOfRexRM(pfx,modrm);
23499 if (epartIsReg(modrm)) {
23500 UInt rE = eregOfRexRM(pfx,modrm);
23501 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
23502 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23503 delta += 1;
23504 } else {
23505 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23506 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
23507 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
23508 delta += alen;
23509 }
23510 /* zero bits 255:64 */
23511 putXMMRegLane64( rG, 1, mkU64(0) );
23512 putYMMRegLane128( rG, 1, mkV128(0) );
23513 goto decode_success;
23514 }
sewardj6be43242012-05-22 23:12:13 +000023515 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
23516 /* Moves from G to E, so is a store-form insn */
sewardj251b59e2012-05-25 13:51:07 +000023517 /* Intel docs list this in the VMOVD entry for some reason. */
sewardj6be43242012-05-22 23:12:13 +000023518 if (have66noF2noF3(pfx)
sewardj82096922012-06-24 14:57:59 +000023519 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
sewardj6be43242012-05-22 23:12:13 +000023520 UChar modrm = getUChar(delta);
23521 UInt rG = gregOfRexRM(pfx,modrm);
sewardj82096922012-06-24 14:57:59 +000023522 if (epartIsReg(modrm)) {
23523 UInt rE = eregOfRexRM(pfx,modrm);
23524 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
23525 putIReg64(rE, getXMMRegLane64(rG, 0));
23526 delta += 1;
23527 } else {
23528 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23529 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
23530 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
23531 delta += alen;
23532 }
sewardj6be43242012-05-22 23:12:13 +000023533 goto decode_success;
23534 }
sewardj6faf7cc2012-05-25 15:53:01 +000023535 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
sewardj72df0682012-05-23 23:54:30 +000023536 /* Moves from G to E, so is a store-form insn */
23537 if (have66noF2noF3(pfx)
sewardj251b59e2012-05-25 13:51:07 +000023538 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
sewardj72df0682012-05-23 23:54:30 +000023539 UChar modrm = getUChar(delta);
23540 UInt rG = gregOfRexRM(pfx,modrm);
sewardj251b59e2012-05-25 13:51:07 +000023541 if (epartIsReg(modrm)) {
23542 UInt rE = eregOfRexRM(pfx,modrm);
23543 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
23544 putIReg32(rE, getXMMRegLane32(rG, 0));
23545 delta += 1;
23546 } else {
23547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23548 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
23549 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
23550 delta += alen;
23551 }
sewardj72df0682012-05-23 23:54:30 +000023552 goto decode_success;
23553 }
sewardjc4530ae2012-05-21 10:18:49 +000023554 break;
23555
23556 case 0x7F:
23557 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
sewardj66becf32012-06-18 23:15:16 +000023558 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
23559 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
23560 && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000023561 UChar modrm = getUChar(delta);
23562 UInt rS = gregOfRexRM(pfx, modrm);
23563 IRTemp tS = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000023564 Bool isA = have66noF2noF3(pfx);
23565 UChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000023566 assign(tS, getYMMReg(rS));
23567 if (epartIsReg(modrm)) {
23568 UInt rD = eregOfRexRM(pfx, modrm);
23569 delta += 1;
23570 putYMMReg(rD, mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000023571 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
sewardjc4530ae2012-05-21 10:18:49 +000023572 } else {
23573 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23574 delta += alen;
sewardj66becf32012-06-18 23:15:16 +000023575 if (isA)
23576 gen_SEGV_if_not_32_aligned(addr);
sewardjc4530ae2012-05-21 10:18:49 +000023577 storeLE(mkexpr(addr), mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000023578 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
sewardjc4530ae2012-05-21 10:18:49 +000023579 }
23580 goto decode_success;
23581 }
23582 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
23583 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
23584 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
23585 && 0==getVexL(pfx)/*128*/) {
23586 UChar modrm = getUChar(delta);
23587 UInt rS = gregOfRexRM(pfx, modrm);
23588 IRTemp tS = newTemp(Ity_V128);
23589 Bool isA = have66noF2noF3(pfx);
23590 UChar ch = isA ? 'a' : 'u';
23591 assign(tS, getXMMReg(rS));
23592 if (epartIsReg(modrm)) {
23593 UInt rD = eregOfRexRM(pfx, modrm);
23594 delta += 1;
23595 putYMMRegLoAndZU(rD, mkexpr(tS));
23596 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
23597 } else {
23598 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23599 delta += alen;
23600 if (isA)
23601 gen_SEGV_if_not_16_aligned(addr);
23602 storeLE(mkexpr(addr), mkexpr(tS));
23603 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
23604 }
23605 goto decode_success;
23606 }
23607 break;
23608
sewardjfe0c5e72012-06-15 15:48:07 +000023609 case 0xAE:
23610 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
23611 if (haveNo66noF2noF3(pfx)
23612 && 0==getVexL(pfx)/*LZ*/
23613 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
23614 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
23615 && sz == 4) {
23616 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
23617 goto decode_success;
23618 }
23619 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
23620 if (haveNo66noF2noF3(pfx)
23621 && 0==getVexL(pfx)/*LZ*/
23622 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
23623 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
23624 && sz == 4) {
23625 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
23626 goto decode_success;
23627 }
23628 break;
23629
sewardjc4530ae2012-05-21 10:18:49 +000023630 case 0xC2:
23631 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
23632 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
23633 if (haveF2no66noF3(pfx)) {
23634 Long delta0 = delta;
23635 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23636 "vcmpsd", False/*!all_lanes*/,
23637 8/*sz*/);
23638 if (delta > delta0) goto decode_success;
23639 /* else fall through -- decoding has failed */
23640 }
23641 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
23642 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
23643 if (haveF3no66noF2(pfx)) {
23644 Long delta0 = delta;
23645 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23646 "vcmpss", False/*!all_lanes*/,
23647 4/*sz*/);
23648 if (delta > delta0) goto decode_success;
23649 /* else fall through -- decoding has failed */
23650 }
sewardj89378162012-06-24 12:12:20 +000023651 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
sewardj4b1cc832012-06-13 11:10:20 +000023652 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
23653 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23654 Long delta0 = delta;
23655 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23656 "vcmppd", True/*all_lanes*/,
23657 8/*sz*/);
23658 if (delta > delta0) goto decode_success;
23659 /* else fall through -- decoding has failed */
23660 }
sewardj89378162012-06-24 12:12:20 +000023661 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
23662 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
23663 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23664 Long delta0 = delta;
23665 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23666 "vcmppd", 8/*sz*/);
23667 if (delta > delta0) goto decode_success;
23668 /* else fall through -- decoding has failed */
23669 }
23670 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
23671 /* = VEX.NDS.128.0F.WIG C2 /r ib */
23672 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23673 Long delta0 = delta;
23674 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23675 "vcmpps", True/*all_lanes*/,
23676 4/*sz*/);
23677 if (delta > delta0) goto decode_success;
23678 /* else fall through -- decoding has failed */
23679 }
23680 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
23681 /* = VEX.NDS.256.0F.WIG C2 /r ib */
23682 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23683 Long delta0 = delta;
23684 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
23685 "vcmpps", 4/*sz*/);
23686 if (delta > delta0) goto decode_success;
23687 /* else fall through -- decoding has failed */
23688 }
sewardjc4530ae2012-05-21 10:18:49 +000023689 break;
23690
sewardj21459cb2012-06-18 14:05:52 +000023691 case 0xC4:
23692 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
23693 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23694 UChar modrm = getUChar(delta);
23695 UInt rG = gregOfRexRM(pfx, modrm);
23696 UInt rV = getVexNvvvv(pfx);
23697 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000023698 IRTemp new16 = newTemp(Ity_I16);
sewardj21459cb2012-06-18 14:05:52 +000023699
23700 if ( epartIsReg( modrm ) ) {
23701 imm8 = (Int)(getUChar(delta+1) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000023702 assign( new16, unop(Iop_32to16,
23703 getIReg32(eregOfRexRM(pfx,modrm))) );
sewardj21459cb2012-06-18 14:05:52 +000023704 delta += 1+1;
23705 DIP( "vpinsrw $%d,%s,%s\n", imm8,
23706 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
23707 } else {
23708 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
23709 imm8 = (Int)(getUChar(delta+alen) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000023710 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
sewardj21459cb2012-06-18 14:05:52 +000023711 delta += alen+1;
23712 DIP( "vpinsrw $%d,%s,%s\n",
23713 imm8, dis_buf, nameXMMReg(rG) );
23714 }
23715
sewardj4ed05e02012-06-18 15:01:30 +000023716 IRTemp src_vec = newTemp(Ity_V128);
23717 assign(src_vec, getXMMReg( rV ));
23718 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
23719 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000023720 *uses_vvvv = True;
23721 goto decode_success;
23722 }
sewardj4ed05e02012-06-18 15:01:30 +000023723 break;
sewardj21459cb2012-06-18 14:05:52 +000023724
sewardje8a7eb72012-06-12 14:59:17 +000023725 case 0xC5:
23726 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
23727 if (have66noF2noF3(pfx)
23728 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
23729 Long delta0 = delta;
23730 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
23731 True/*isAvx*/ );
23732 if (delta > delta0) goto decode_success;
23733 /* else fall through -- decoding has failed */
23734 }
23735 break;
23736
sewardj251b59e2012-05-25 13:51:07 +000023737 case 0xC6:
23738 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
23739 /* = VEX.NDS.128.0F.WIG C6 /r ib */
23740 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23741 Int imm8 = 0;
23742 IRTemp eV = newTemp(Ity_V128);
23743 IRTemp vV = newTemp(Ity_V128);
23744 UInt modrm = getUChar(delta);
23745 UInt rG = gregOfRexRM(pfx,modrm);
23746 UInt rV = getVexNvvvv(pfx);
23747 assign( vV, getXMMReg(rV) );
23748 if (epartIsReg(modrm)) {
23749 UInt rE = eregOfRexRM(pfx,modrm);
23750 assign( eV, getXMMReg(rE) );
23751 imm8 = (Int)getUChar(delta+1);
23752 delta += 1+1;
23753 DIP("vshufps $%d,%s,%s,%s\n",
23754 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23755 } else {
23756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23757 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
23758 imm8 = (Int)getUChar(delta+alen);
23759 delta += 1+alen;
23760 DIP("vshufps $%d,%s,%s,%s\n",
23761 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23762 }
sewardj4b1cc832012-06-13 11:10:20 +000023763 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
23764 putYMMRegLoAndZU( rG, mkexpr(res) );
23765 *uses_vvvv = True;
23766 goto decode_success;
23767 }
23768 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
23769 /* = VEX.NDS.256.0F.WIG C6 /r ib */
23770 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23771 Int imm8 = 0;
23772 IRTemp eV = newTemp(Ity_V256);
23773 IRTemp vV = newTemp(Ity_V256);
23774 UInt modrm = getUChar(delta);
23775 UInt rG = gregOfRexRM(pfx,modrm);
23776 UInt rV = getVexNvvvv(pfx);
23777 assign( vV, getYMMReg(rV) );
23778 if (epartIsReg(modrm)) {
23779 UInt rE = eregOfRexRM(pfx,modrm);
23780 assign( eV, getYMMReg(rE) );
23781 imm8 = (Int)getUChar(delta+1);
23782 delta += 1+1;
23783 DIP("vshufps $%d,%s,%s,%s\n",
23784 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23785 } else {
23786 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23787 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
23788 imm8 = (Int)getUChar(delta+alen);
23789 delta += 1+alen;
23790 DIP("vshufps $%d,%s,%s,%s\n",
23791 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23792 }
23793 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
23794 putYMMReg( rG, mkexpr(res) );
sewardj251b59e2012-05-25 13:51:07 +000023795 *uses_vvvv = True;
23796 goto decode_success;
23797 }
sewardj21459cb2012-06-18 14:05:52 +000023798 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
23799 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
23800 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23801 Int imm8 = 0;
23802 IRTemp eV = newTemp(Ity_V128);
23803 IRTemp vV = newTemp(Ity_V128);
23804 UInt modrm = getUChar(delta);
23805 UInt rG = gregOfRexRM(pfx,modrm);
23806 UInt rV = getVexNvvvv(pfx);
23807 assign( vV, getXMMReg(rV) );
23808 if (epartIsReg(modrm)) {
23809 UInt rE = eregOfRexRM(pfx,modrm);
23810 assign( eV, getXMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000023811 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000023812 delta += 1+1;
23813 DIP("vshufpd $%d,%s,%s,%s\n",
23814 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23815 } else {
23816 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23817 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000023818 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000023819 delta += 1+alen;
23820 DIP("vshufpd $%d,%s,%s,%s\n",
23821 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23822 }
23823 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
23824 putYMMRegLoAndZU( rG, mkexpr(res) );
23825 *uses_vvvv = True;
23826 goto decode_success;
23827 }
23828 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
23829 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
23830 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23831 Int imm8 = 0;
23832 IRTemp eV = newTemp(Ity_V256);
23833 IRTemp vV = newTemp(Ity_V256);
23834 UInt modrm = getUChar(delta);
23835 UInt rG = gregOfRexRM(pfx,modrm);
23836 UInt rV = getVexNvvvv(pfx);
23837 assign( vV, getYMMReg(rV) );
23838 if (epartIsReg(modrm)) {
23839 UInt rE = eregOfRexRM(pfx,modrm);
23840 assign( eV, getYMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000023841 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000023842 delta += 1+1;
23843 DIP("vshufpd $%d,%s,%s,%s\n",
23844 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23845 } else {
23846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23847 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000023848 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000023849 delta += 1+alen;
23850 DIP("vshufpd $%d,%s,%s,%s\n",
23851 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23852 }
23853 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
23854 putYMMReg( rG, mkexpr(res) );
23855 *uses_vvvv = True;
23856 goto decode_success;
23857 }
sewardj251b59e2012-05-25 13:51:07 +000023858 break;
23859
sewardj89378162012-06-24 12:12:20 +000023860 case 0xD0:
23861 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
23862 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23863 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
23864 uses_vvvv, vbi, pfx, delta,
23865 "vaddsubpd", math_ADDSUBPD_128 );
23866 goto decode_success;
23867 }
23868 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
23869 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23870 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
23871 uses_vvvv, vbi, pfx, delta,
23872 "vaddsubpd", math_ADDSUBPD_256 );
23873 goto decode_success;
23874 }
23875 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
23876 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23877 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
23878 uses_vvvv, vbi, pfx, delta,
23879 "vaddsubps", math_ADDSUBPS_128 );
23880 goto decode_success;
23881 }
23882 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
23883 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23884 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
23885 uses_vvvv, vbi, pfx, delta,
23886 "vaddsubps", math_ADDSUBPS_256 );
23887 goto decode_success;
23888 }
23889 break;
23890
sewardj4c0a7ac2012-06-21 09:08:19 +000023891 case 0xD1:
23892 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
23893 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23894 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
23895 "vpsrlw", Iop_ShrN16x8 );
23896 *uses_vvvv = True;
23897 goto decode_success;
23898
23899 }
23900 break;
23901
23902 case 0xD2:
23903 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
23904 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23905 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
23906 "vpsrld", Iop_ShrN32x4 );
23907 *uses_vvvv = True;
23908 goto decode_success;
23909 }
23910 break;
23911
23912 case 0xD3:
23913 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
23914 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23915 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
23916 "vpsrlq", Iop_ShrN64x2 );
23917 *uses_vvvv = True;
23918 goto decode_success;
23919 }
23920 break;
23921
sewardj98d02cc2012-06-02 11:55:25 +000023922 case 0xD4:
23923 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
23924 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
23925 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23926 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
23927 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
23928 goto decode_success;
23929 }
23930 break;
23931
sewardj251b59e2012-05-25 13:51:07 +000023932 case 0xD5:
23933 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
23934 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23935 delta = dis_AVX128_E_V_to_G(
23936 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
23937 goto decode_success;
23938 }
23939 break;
23940
sewardjc4530ae2012-05-21 10:18:49 +000023941 case 0xD6:
23942 /* I can't even find any Intel docs for this one. */
23943 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
23944 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
23945 (WIG, maybe?) */
sewardj6eaf00c2012-05-23 11:33:56 +000023946 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardjc4530ae2012-05-21 10:18:49 +000023947 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
23948 UChar modrm = getUChar(delta);
23949 UInt rG = gregOfRexRM(pfx,modrm);
23950 if (epartIsReg(modrm)) {
23951 /* fall through, awaiting test case */
23952 /* dst: lo half copied, hi half zeroed */
23953 } else {
23954 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23955 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
23956 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
23957 delta += alen;
23958 goto decode_success;
23959 }
23960 }
23961 break;
23962
sewardj8ef22422012-05-24 16:29:18 +000023963 case 0xD7:
23964 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
23965 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23966 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
23967 goto decode_success;
23968 }
23969 break;
23970
sewardj251b59e2012-05-25 13:51:07 +000023971 case 0xD8:
23972 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
23973 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23974 delta = dis_AVX128_E_V_to_G(
23975 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
23976 goto decode_success;
23977 }
23978 break;
23979
sewardj6fcd43e2012-06-14 08:51:35 +000023980 case 0xD9:
23981 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
23982 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23983 delta = dis_AVX128_E_V_to_G(
23984 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
23985 goto decode_success;
23986 }
23987 break;
23988
sewardje8a7eb72012-06-12 14:59:17 +000023989 case 0xDA:
23990 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
23991 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23992 delta = dis_AVX128_E_V_to_G(
23993 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
23994 goto decode_success;
23995 }
23996 break;
23997
sewardj8ef22422012-05-24 16:29:18 +000023998 case 0xDB:
sewardj251b59e2012-05-25 13:51:07 +000023999 /* VPAND r/m, rV, r ::: r = rV & r/m */
sewardj8ef22422012-05-24 16:29:18 +000024000 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
24001 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24002 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24003 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
24004 goto decode_success;
24005 }
24006 break;
24007
sewardj251b59e2012-05-25 13:51:07 +000024008 case 0xDC:
24009 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
24010 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24011 delta = dis_AVX128_E_V_to_G(
24012 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
24013 goto decode_success;
24014 }
24015 break;
24016
24017 case 0xDD:
24018 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
24019 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24020 delta = dis_AVX128_E_V_to_G(
24021 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
24022 goto decode_success;
24023 }
24024 break;
24025
sewardje8a7eb72012-06-12 14:59:17 +000024026 case 0xDE:
24027 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
24028 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24029 delta = dis_AVX128_E_V_to_G(
24030 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
24031 goto decode_success;
24032 }
24033 break;
24034
24035 case 0xDF:
24036 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
24037 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
24038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24039 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24040 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
24041 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
24042 goto decode_success;
24043 }
24044 break;
24045
sewardj8516a1f2012-06-24 14:26:30 +000024046 case 0xE0:
24047 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
24048 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24049 delta = dis_AVX128_E_V_to_G(
24050 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
24051 goto decode_success;
24052 }
24053 break;
24054
sewardj4c0a7ac2012-06-21 09:08:19 +000024055 case 0xE1:
24056 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
24057 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24058 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
24059 "vpsraw", Iop_SarN16x8 );
24060 *uses_vvvv = True;
24061 goto decode_success;
24062 }
24063 break;
24064
24065 case 0xE2:
24066 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
24067 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24068 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
24069 "vpsrad", Iop_SarN32x4 );
24070 *uses_vvvv = True;
24071 goto decode_success;
24072 }
24073 break;
24074
sewardj8516a1f2012-06-24 14:26:30 +000024075 case 0xE3:
24076 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
24077 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24078 delta = dis_AVX128_E_V_to_G(
24079 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
24080 goto decode_success;
24081 }
24082 break;
24083
sewardj251b59e2012-05-25 13:51:07 +000024084 case 0xE4:
24085 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
24086 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24087 delta = dis_AVX128_E_V_to_G(
24088 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
24089 goto decode_success;
24090 }
24091 break;
24092
sewardjfe0c5e72012-06-15 15:48:07 +000024093 case 0xE5:
24094 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
24095 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24096 delta = dis_AVX128_E_V_to_G(
24097 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
24098 goto decode_success;
24099 }
24100 break;
24101
sewardj4b1cc832012-06-13 11:10:20 +000024102 case 0xE6:
24103 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
24104 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24105 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
24106 goto decode_success;
24107 }
sewardj6fcd43e2012-06-14 08:51:35 +000024108 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
24109 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24110 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
24111 goto decode_success;
24112 }
sewardj66becf32012-06-18 23:15:16 +000024113 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
24114 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24115 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
24116 True/*r2zero*/);
24117 goto decode_success;
24118 }
24119 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
24120 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24121 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
24122 goto decode_success;
24123 }
24124 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
24125 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24126 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
24127 False/*!r2zero*/);
24128 goto decode_success;
24129 }
24130 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
24131 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24132 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
24133 goto decode_success;
24134 }
sewardj4b1cc832012-06-13 11:10:20 +000024135 break;
24136
sewardj6eaf00c2012-05-23 11:33:56 +000024137 case 0xE7:
sewardj8eb7ae82012-06-24 14:00:27 +000024138 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
sewardj6eaf00c2012-05-23 11:33:56 +000024139 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24140 UChar modrm = getUChar(delta);
24141 UInt rG = gregOfRexRM(pfx,modrm);
24142 if (!epartIsReg(modrm)) {
24143 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24144 gen_SEGV_if_not_16_aligned( addr );
24145 storeLE( mkexpr(addr), getXMMReg(rG) );
24146 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
24147 delta += alen;
24148 goto decode_success;
24149 }
24150 /* else fall through */
24151 }
sewardj8eb7ae82012-06-24 14:00:27 +000024152 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
24153 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24154 UChar modrm = getUChar(delta);
24155 UInt rG = gregOfRexRM(pfx,modrm);
24156 if (!epartIsReg(modrm)) {
24157 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24158 gen_SEGV_if_not_32_aligned( addr );
24159 storeLE( mkexpr(addr), getYMMReg(rG) );
24160 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
24161 delta += alen;
24162 goto decode_success;
24163 }
24164 /* else fall through */
24165 }
sewardj6eaf00c2012-05-23 11:33:56 +000024166 break;
24167
sewardj4f228902012-06-21 09:17:58 +000024168 case 0xE8:
24169 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
24170 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24171 delta = dis_AVX128_E_V_to_G(
24172 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
24173 goto decode_success;
24174 }
sewardj8516a1f2012-06-24 14:26:30 +000024175 break;
sewardj4f228902012-06-21 09:17:58 +000024176
24177 case 0xE9:
24178 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
24179 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24180 delta = dis_AVX128_E_V_to_G(
24181 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
24182 goto decode_success;
24183 }
24184 break;
24185
sewardje8a7eb72012-06-12 14:59:17 +000024186 case 0xEA:
24187 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
24188 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
24189 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24190 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24191 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
24192 goto decode_success;
24193 }
24194 break;
24195
sewardjc4530ae2012-05-21 10:18:49 +000024196 case 0xEB:
sewardj251b59e2012-05-25 13:51:07 +000024197 /* VPOR r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024198 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
24199 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24200 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24201 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
24202 goto decode_success;
24203 }
24204 break;
24205
sewardj8516a1f2012-06-24 14:26:30 +000024206 case 0xEC:
24207 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
24208 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24209 delta = dis_AVX128_E_V_to_G(
24210 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
24211 goto decode_success;
24212 }
24213 break;
24214
24215 case 0xED:
24216 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
24217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24218 delta = dis_AVX128_E_V_to_G(
24219 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
24220 goto decode_success;
24221 }
24222 break;
24223
sewardje8a7eb72012-06-12 14:59:17 +000024224 case 0xEE:
24225 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
24226 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
24227 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24228 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24229 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
24230 goto decode_success;
24231 }
24232 break;
24233
sewardjc4530ae2012-05-21 10:18:49 +000024234 case 0xEF:
sewardj251b59e2012-05-25 13:51:07 +000024235 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024236 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
24237 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24238 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24239 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
24240 goto decode_success;
24241 }
24242 break;
24243
sewardjadf357c2012-06-24 13:44:17 +000024244 case 0xF0:
24245 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
24246 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24247 UChar modrm = getUChar(delta);
24248 UInt rD = gregOfRexRM(pfx, modrm);
24249 IRTemp tD = newTemp(Ity_V256);
24250 if (epartIsReg(modrm)) break;
24251 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24252 delta += alen;
24253 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
24254 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
24255 putYMMReg(rD, mkexpr(tD));
24256 goto decode_success;
24257 }
24258 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
24259 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24260 UChar modrm = getUChar(delta);
24261 UInt rD = gregOfRexRM(pfx, modrm);
24262 IRTemp tD = newTemp(Ity_V128);
24263 if (epartIsReg(modrm)) break;
24264 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24265 delta += alen;
24266 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
24267 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
24268 putYMMRegLoAndZU(rD, mkexpr(tD));
24269 goto decode_success;
24270 }
24271 break;
24272
sewardj4c0a7ac2012-06-21 09:08:19 +000024273 case 0xF1:
24274 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
24275 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24276 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
24277 "vpsllw", Iop_ShlN16x8 );
24278 *uses_vvvv = True;
24279 goto decode_success;
24280
24281 }
24282 break;
24283
24284 case 0xF2:
24285 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
24286 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24287 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
24288 "vpslld", Iop_ShlN32x4 );
24289 *uses_vvvv = True;
24290 goto decode_success;
24291 }
24292 break;
24293
24294 case 0xF3:
24295 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
24296 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24297 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
24298 "vpsllq", Iop_ShlN64x2 );
24299 *uses_vvvv = True;
24300 goto decode_success;
24301 }
24302 break;
24303
sewardje8a7eb72012-06-12 14:59:17 +000024304 case 0xF4:
24305 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
24306 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24307 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24308 uses_vvvv, vbi, pfx, delta,
24309 "vpmuludq", math_PMULUDQ_128 );
sewardj89378162012-06-24 12:12:20 +000024310 goto decode_success;
24311 }
24312 break;
24313
24314 case 0xF5:
24315 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
24316 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24317 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24318 uses_vvvv, vbi, pfx, delta,
24319 "vpmaddwd", math_PMADDWD_128 );
24320 goto decode_success;
sewardje8a7eb72012-06-12 14:59:17 +000024321 }
24322 break;
24323
sewardj82096922012-06-24 14:57:59 +000024324 case 0xF6:
24325 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
24326 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24327 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24328 uses_vvvv, vbi, pfx, delta,
24329 "vpsadbw", math_PSADBW_128 );
24330 goto decode_success;
24331 }
24332 break;
24333
sewardj8eb7ae82012-06-24 14:00:27 +000024334 case 0xF7:
24335 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
24336 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24337 && epartIsReg(getUChar(delta))) {
24338 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
24339 goto decode_success;
24340 }
24341 break;
24342
sewardjc4530ae2012-05-21 10:18:49 +000024343 case 0xF8:
sewardj251b59e2012-05-25 13:51:07 +000024344 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000024345 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
sewardjc4530ae2012-05-21 10:18:49 +000024346 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24347 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24348 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
24349 goto decode_success;
24350 }
24351 break;
24352
sewardj98d02cc2012-06-02 11:55:25 +000024353 case 0xF9:
24354 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
24355 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
24356 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24357 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24358 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
24359 goto decode_success;
24360 }
24361 break;
24362
sewardjc4530ae2012-05-21 10:18:49 +000024363 case 0xFA:
sewardj251b59e2012-05-25 13:51:07 +000024364 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000024365 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
sewardjc4530ae2012-05-21 10:18:49 +000024366 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24367 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24368 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
24369 goto decode_success;
24370 }
24371 break;
24372
sewardj98d02cc2012-06-02 11:55:25 +000024373 case 0xFB:
24374 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
24375 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
24376 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24377 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24378 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
24379 goto decode_success;
24380 }
24381 break;
24382
sewardj6fcd43e2012-06-14 08:51:35 +000024383 case 0xFC:
24384 /* VPADDB r/m, rV, r ::: r = rV + r/m */
24385 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
24386 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24387 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24388 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
24389 goto decode_success;
24390 }
24391 break;
24392
sewardj6faf7cc2012-05-25 15:53:01 +000024393 case 0xFD:
24394 /* VPADDW r/m, rV, r ::: r = rV + r/m */
24395 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
24396 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24397 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24398 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
24399 goto decode_success;
24400 }
24401 break;
24402
sewardjc4530ae2012-05-21 10:18:49 +000024403 case 0xFE:
sewardj251b59e2012-05-25 13:51:07 +000024404 /* VPADDD r/m, rV, r ::: r = rV + r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024405 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
24406 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24407 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24408 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
24409 goto decode_success;
24410 }
24411 break;
24412
24413 default:
24414 break;
24415
24416 }
24417
24418 //decode_failure:
24419 return deltaIN;
24420
24421 decode_success:
24422 return delta;
24423}
24424
24425
24426/*------------------------------------------------------------*/
24427/*--- ---*/
24428/*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
24429/*--- ---*/
24430/*------------------------------------------------------------*/
24431
sewardjd8bca7e2012-06-20 11:46:19 +000024432static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
24433{
24434 /* In the control vector, zero out all but the bottom two bits of
24435 each 32-bit lane. */
24436 IRExpr* cv1 = binop(Iop_ShrN32x4,
24437 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
24438 mkU8(30));
24439 /* And use the resulting cleaned-up control vector as steering
24440 in a Perm operation. */
24441 IRTemp res = newTemp(Ity_V128);
24442 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
24443 return res;
24444}
24445
24446static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
24447{
24448 IRTemp dHi, dLo, cHi, cLo;
24449 dHi = dLo = cHi = cLo = IRTemp_INVALID;
24450 breakupV256toV128s( dataV, &dHi, &dLo );
24451 breakupV256toV128s( ctrlV, &cHi, &cLo );
24452 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
24453 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
24454 IRTemp res = newTemp(Ity_V256);
24455 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
24456 return res;
24457}
24458
24459static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
24460{
24461 /* No cleverness here .. */
24462 IRTemp dHi, dLo, cHi, cLo;
24463 dHi = dLo = cHi = cLo = IRTemp_INVALID;
24464 breakupV128to64s( dataV, &dHi, &dLo );
24465 breakupV128to64s( ctrlV, &cHi, &cLo );
24466 IRExpr* rHi
24467 = IRExpr_Mux0X( unop(Iop_64to8,
24468 binop(Iop_And64, mkexpr(cHi), mkU64(2))),
24469 mkexpr(dLo), mkexpr(dHi) );
24470 IRExpr* rLo
24471 = IRExpr_Mux0X( unop(Iop_64to8,
24472 binop(Iop_And64, mkexpr(cLo), mkU64(2))),
24473 mkexpr(dLo), mkexpr(dHi) );
24474 IRTemp res = newTemp(Ity_V128);
24475 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
24476 return res;
24477}
24478
24479static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
24480{
24481 IRTemp dHi, dLo, cHi, cLo;
24482 dHi = dLo = cHi = cLo = IRTemp_INVALID;
24483 breakupV256toV128s( dataV, &dHi, &dLo );
24484 breakupV256toV128s( ctrlV, &cHi, &cLo );
24485 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
24486 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
24487 IRTemp res = newTemp(Ity_V256);
24488 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
24489 return res;
24490}
24491
sewardjc4530ae2012-05-21 10:18:49 +000024492__attribute__((noinline))
24493static
24494Long dis_ESC_0F38__VEX (
24495 /*MB_OUT*/DisResult* dres,
24496 /*OUT*/ Bool* uses_vvvv,
24497 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
24498 Bool resteerCisOk,
24499 void* callback_opaque,
24500 VexArchInfo* archinfo,
24501 VexAbiInfo* vbi,
24502 Prefix pfx, Int sz, Long deltaIN
24503 )
24504{
sewardj4b1cc832012-06-13 11:10:20 +000024505 IRTemp addr = IRTemp_INVALID;
24506 Int alen = 0;
24507 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +000024508 Long delta = deltaIN;
24509 UChar opc = getUChar(delta);
24510 delta++;
24511 *uses_vvvv = False;
24512
24513 switch (opc) {
24514
24515 case 0x00:
sewardj251b59e2012-05-25 13:51:07 +000024516 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000024517 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
24518 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24519 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24520 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
24521 goto decode_success;
24522 }
24523 break;
24524
sewardj8516a1f2012-06-24 14:26:30 +000024525 case 0x01:
24526 case 0x02:
24527 case 0x03:
24528 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
24529 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
24530 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
24531 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24532 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
24533 *uses_vvvv = True;
24534 goto decode_success;
24535 }
24536 break;
24537
24538 case 0x04:
24539 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
24540 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24541 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24542 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
24543 math_PMADDUBSW_128 );
24544 goto decode_success;
24545 }
24546 break;
24547
24548 case 0x05:
24549 case 0x06:
24550 case 0x07:
24551 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
24552 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
24553 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
24554 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24555 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
24556 *uses_vvvv = True;
24557 goto decode_success;
24558 }
24559 break;
24560
sewardj82096922012-06-24 14:57:59 +000024561 case 0x08:
24562 case 0x09:
24563 case 0x0A:
24564 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
24565 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
24566 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
24567 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24568 IRTemp sV = newTemp(Ity_V128);
24569 IRTemp dV = newTemp(Ity_V128);
24570 IRTemp sHi, sLo, dHi, dLo;
24571 sHi = sLo = dHi = dLo = IRTemp_INVALID;
24572 UChar ch = '?';
24573 Int laneszB = 0;
24574 UChar modrm = getUChar(delta);
24575 UInt rG = gregOfRexRM(pfx,modrm);
24576 UInt rV = getVexNvvvv(pfx);
24577
24578 switch (opc) {
24579 case 0x08: laneszB = 1; ch = 'b'; break;
24580 case 0x09: laneszB = 2; ch = 'w'; break;
24581 case 0x0A: laneszB = 4; ch = 'd'; break;
24582 default: vassert(0);
24583 }
24584
24585 assign( dV, getXMMReg(rV) );
24586
24587 if (epartIsReg(modrm)) {
24588 UInt rE = eregOfRexRM(pfx,modrm);
24589 assign( sV, getXMMReg(rE) );
24590 delta += 1;
24591 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
24592 nameXMMReg(rV), nameXMMReg(rG));
24593 } else {
24594 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24595 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
24596 delta += alen;
24597 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
24598 nameXMMReg(rV), nameXMMReg(rG));
24599 }
24600
24601 breakupV128to64s( dV, &dHi, &dLo );
24602 breakupV128to64s( sV, &sHi, &sLo );
24603
24604 putYMMRegLoAndZU(
24605 rG,
24606 binop(Iop_64HLtoV128,
24607 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
24608 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
24609 )
24610 );
24611 *uses_vvvv = True;
24612 goto decode_success;
24613 }
24614 break;
24615
24616 case 0x0B:
24617 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
24618 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24619 IRTemp sV = newTemp(Ity_V128);
24620 IRTemp dV = newTemp(Ity_V128);
24621 IRTemp sHi, sLo, dHi, dLo;
24622 sHi = sLo = dHi = dLo = IRTemp_INVALID;
24623 UChar modrm = getUChar(delta);
24624 UInt rG = gregOfRexRM(pfx,modrm);
24625 UInt rV = getVexNvvvv(pfx);
24626
24627 assign( dV, getXMMReg(rV) );
24628
24629 if (epartIsReg(modrm)) {
24630 UInt rE = eregOfRexRM(pfx,modrm);
24631 assign( sV, getXMMReg(rE) );
24632 delta += 1;
24633 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
24634 nameXMMReg(rV), nameXMMReg(rG));
24635 } else {
24636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24637 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
24638 delta += alen;
24639 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
24640 nameXMMReg(rV), nameXMMReg(rG));
24641 }
24642
24643 breakupV128to64s( dV, &dHi, &dLo );
24644 breakupV128to64s( sV, &sHi, &sLo );
24645
24646 putYMMRegLoAndZU(
24647 rG,
24648 binop(Iop_64HLtoV128,
24649 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
24650 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
24651 )
24652 );
24653 *uses_vvvv = True;
24654 goto decode_success;
24655 }
24656 break;
24657
sewardjd8bca7e2012-06-20 11:46:19 +000024658 case 0x0C:
24659 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
24660 if (have66noF2noF3(pfx)
24661 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
24662 UChar modrm = getUChar(delta);
24663 UInt rG = gregOfRexRM(pfx, modrm);
24664 UInt rV = getVexNvvvv(pfx);
24665 IRTemp ctrlV = newTemp(Ity_V128);
24666 if (epartIsReg(modrm)) {
24667 UInt rE = eregOfRexRM(pfx, modrm);
24668 delta += 1;
24669 DIP("vpermilps %s,%s,%s\n",
24670 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24671 assign(ctrlV, getXMMReg(rE));
24672 } else {
sewardj148e5942012-06-21 08:34:19 +000024673 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000024674 delta += alen;
24675 DIP("vpermilps %s,%s,%s\n",
24676 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24677 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
24678 }
24679 IRTemp dataV = newTemp(Ity_V128);
24680 assign(dataV, getXMMReg(rV));
24681 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
24682 putYMMRegLoAndZU(rG, mkexpr(resV));
24683 *uses_vvvv = True;
24684 goto decode_success;
24685 }
24686 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
24687 if (have66noF2noF3(pfx)
24688 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
24689 UChar modrm = getUChar(delta);
24690 UInt rG = gregOfRexRM(pfx, modrm);
24691 UInt rV = getVexNvvvv(pfx);
24692 IRTemp ctrlV = newTemp(Ity_V256);
24693 if (epartIsReg(modrm)) {
24694 UInt rE = eregOfRexRM(pfx, modrm);
24695 delta += 1;
24696 DIP("vpermilps %s,%s,%s\n",
24697 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
24698 assign(ctrlV, getYMMReg(rE));
24699 } else {
sewardj148e5942012-06-21 08:34:19 +000024700 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000024701 delta += alen;
24702 DIP("vpermilps %s,%s,%s\n",
24703 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
24704 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
24705 }
24706 IRTemp dataV = newTemp(Ity_V256);
24707 assign(dataV, getYMMReg(rV));
24708 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
24709 putYMMReg(rG, mkexpr(resV));
24710 *uses_vvvv = True;
24711 goto decode_success;
24712 }
24713 break;
24714
24715 case 0x0D:
24716 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
24717 if (have66noF2noF3(pfx)
24718 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
24719 UChar modrm = getUChar(delta);
24720 UInt rG = gregOfRexRM(pfx, modrm);
24721 UInt rV = getVexNvvvv(pfx);
24722 IRTemp ctrlV = newTemp(Ity_V128);
24723 if (epartIsReg(modrm)) {
24724 UInt rE = eregOfRexRM(pfx, modrm);
24725 delta += 1;
24726 DIP("vpermilpd %s,%s,%s\n",
24727 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24728 assign(ctrlV, getXMMReg(rE));
24729 } else {
sewardj148e5942012-06-21 08:34:19 +000024730 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000024731 delta += alen;
24732 DIP("vpermilpd %s,%s,%s\n",
24733 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24734 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
24735 }
24736 IRTemp dataV = newTemp(Ity_V128);
24737 assign(dataV, getXMMReg(rV));
24738 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
24739 putYMMRegLoAndZU(rG, mkexpr(resV));
24740 *uses_vvvv = True;
24741 goto decode_success;
24742 }
24743 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
24744 if (have66noF2noF3(pfx)
24745 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
24746 UChar modrm = getUChar(delta);
24747 UInt rG = gregOfRexRM(pfx, modrm);
24748 UInt rV = getVexNvvvv(pfx);
24749 IRTemp ctrlV = newTemp(Ity_V256);
24750 if (epartIsReg(modrm)) {
24751 UInt rE = eregOfRexRM(pfx, modrm);
24752 delta += 1;
24753 DIP("vpermilpd %s,%s,%s\n",
24754 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
24755 assign(ctrlV, getYMMReg(rE));
24756 } else {
sewardj148e5942012-06-21 08:34:19 +000024757 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000024758 delta += alen;
24759 DIP("vpermilpd %s,%s,%s\n",
24760 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
24761 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
24762 }
24763 IRTemp dataV = newTemp(Ity_V256);
24764 assign(dataV, getYMMReg(rV));
24765 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
24766 putYMMReg(rG, mkexpr(resV));
24767 *uses_vvvv = True;
24768 goto decode_success;
24769 }
24770 break;
24771
sewardjed1884d2012-06-21 08:53:48 +000024772 case 0x0E:
24773 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
24774 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24775 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
24776 goto decode_success;
24777 }
24778 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
24779 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24780 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
24781 goto decode_success;
24782 }
24783 break;
24784
24785 case 0x0F:
24786 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
24787 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24788 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
24789 goto decode_success;
24790 }
24791 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
24792 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24793 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
24794 goto decode_success;
24795 }
24796 break;
24797
24798 case 0x17:
24799 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
24800 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24801 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
24802 goto decode_success;
24803 }
24804 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
24805 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24806 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
24807 goto decode_success;
24808 }
24809 break;
24810
sewardj6fcd43e2012-06-14 08:51:35 +000024811 case 0x18:
sewardj151cd3e2012-06-18 13:56:55 +000024812 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000024813 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000024814 && 0==getVexL(pfx)/*128*/
sewardj6fcd43e2012-06-14 08:51:35 +000024815 && !epartIsReg(getUChar(delta))) {
24816 UChar modrm = getUChar(delta);
24817 UInt rG = gregOfRexRM(pfx, modrm);
24818 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24819 delta += alen;
24820 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
24821 IRTemp t32 = newTemp(Ity_I32);
24822 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
24823 IRTemp t64 = newTemp(Ity_I64);
24824 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
24825 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
24826 putYMMRegLoAndZU(rG, res);
24827 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000024828 }
sewardj151cd3e2012-06-18 13:56:55 +000024829 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
24830 if (have66noF2noF3(pfx)
24831 && 1==getVexL(pfx)/*256*/
24832 && !epartIsReg(getUChar(delta))) {
24833 UChar modrm = getUChar(delta);
24834 UInt rG = gregOfRexRM(pfx, modrm);
24835 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24836 delta += alen;
24837 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
24838 IRTemp t32 = newTemp(Ity_I32);
24839 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
24840 IRTemp t64 = newTemp(Ity_I64);
24841 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
24842 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
24843 mkexpr(t64), mkexpr(t64));
24844 putYMMReg(rG, res);
24845 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000024846 }
24847 break;
sewardj6fcd43e2012-06-14 08:51:35 +000024848
sewardj4b1cc832012-06-13 11:10:20 +000024849 case 0x19:
sewardj151cd3e2012-06-18 13:56:55 +000024850 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
sewardj4b1cc832012-06-13 11:10:20 +000024851 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000024852 && 1==getVexL(pfx)/*256*/
sewardj4b1cc832012-06-13 11:10:20 +000024853 && !epartIsReg(getUChar(delta))) {
24854 UChar modrm = getUChar(delta);
24855 UInt rG = gregOfRexRM(pfx, modrm);
24856 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24857 delta += alen;
24858 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
24859 IRTemp t64 = newTemp(Ity_I64);
24860 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
24861 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
24862 mkexpr(t64), mkexpr(t64));
24863 putYMMReg(rG, res);
24864 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000024865 }
24866 break;
24867
24868 case 0x1A:
24869 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
24870 if (have66noF2noF3(pfx)
24871 && 1==getVexL(pfx)/*256*/
24872 && !epartIsReg(getUChar(delta))) {
24873 UChar modrm = getUChar(delta);
24874 UInt rG = gregOfRexRM(pfx, modrm);
24875 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24876 delta += alen;
24877 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
24878 IRTemp t128 = newTemp(Ity_V128);
24879 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
24880 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
24881 goto decode_success;
24882 }
24883 break;
sewardj4b1cc832012-06-13 11:10:20 +000024884
sewardj8516a1f2012-06-24 14:26:30 +000024885 case 0x1C:
24886 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
24887 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24888 delta = dis_AVX128_E_to_G_unary(
24889 uses_vvvv, vbi, pfx, delta,
24890 "vpabsb", math_PABS_XMM_pap1 );
24891 goto decode_success;
24892 }
24893 break;
24894
24895 case 0x1D:
24896 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
24897 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24898 delta = dis_AVX128_E_to_G_unary(
24899 uses_vvvv, vbi, pfx, delta,
24900 "vpabsw", math_PABS_XMM_pap2 );
24901 goto decode_success;
24902 }
24903 break;
24904
sewardj97f72452012-05-23 05:56:53 +000024905 case 0x1E:
24906 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
24907 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24908 delta = dis_AVX128_E_to_G_unary(
24909 uses_vvvv, vbi, pfx, delta,
24910 "vpabsd", math_PABS_XMM_pap4 );
24911 goto decode_success;
24912 }
24913 break;
24914
sewardj6fcd43e2012-06-14 08:51:35 +000024915 case 0x20:
24916 /* VPMOVSXBW xmm2/m64, xmm1 */
24917 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
24918 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24919 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
24920 True/*isAvx*/, False/*!xIsZ*/ );
24921 goto decode_success;
24922 }
24923 break;
24924
sewardj4ed05e02012-06-18 15:01:30 +000024925 case 0x21:
24926 /* VPMOVSXBD xmm2/m32, xmm1 */
24927 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
24928 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24929 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
24930 True/*isAvx*/, False/*!xIsZ*/ );
24931 goto decode_success;
24932 }
24933 break;
24934
sewardj8516a1f2012-06-24 14:26:30 +000024935 case 0x22:
24936 /* VPMOVSXBQ xmm2/m16, xmm1 */
24937 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
24938 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24939 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
24940 goto decode_success;
24941 }
24942 break;
24943
sewardj6fcd43e2012-06-14 08:51:35 +000024944 case 0x23:
24945 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
24946 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24947 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
24948 True/*isAvx*/, False/*!xIsZ*/ );
24949 goto decode_success;
24950 }
24951 break;
24952
sewardj8516a1f2012-06-24 14:26:30 +000024953 case 0x24:
24954 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
24955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24956 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
24957 goto decode_success;
24958 }
24959 break;
24960
sewardj6fcd43e2012-06-14 08:51:35 +000024961 case 0x25:
24962 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
24963 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24964 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
24965 True/*isAvx*/, False/*!xIsZ*/ );
24966 goto decode_success;
24967 }
24968 break;
24969
sewardj89378162012-06-24 12:12:20 +000024970 case 0x28:
24971 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
24972 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24973 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
24974 uses_vvvv, vbi, pfx, delta,
24975 "vpmuldq", math_PMULDQ_128 );
24976 goto decode_success;
24977 }
24978 break;
24979
sewardj56c30312012-06-12 08:45:39 +000024980 case 0x29:
24981 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
24982 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
24983 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24984 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24985 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
24986 goto decode_success;
24987 }
24988 break;
24989
sewardj8eb7ae82012-06-24 14:00:27 +000024990 case 0x2A:
24991 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
24992 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24993 && !epartIsReg(getUChar(delta))) {
24994 UChar modrm = getUChar(delta);
24995 UInt rD = gregOfRexRM(pfx, modrm);
24996 IRTemp tD = newTemp(Ity_V128);
24997 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24998 delta += alen;
24999 gen_SEGV_if_not_16_aligned(addr);
25000 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
25001 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
25002 putYMMRegLoAndZU(rD, mkexpr(tD));
25003 goto decode_success;
25004 }
25005 break;
25006
sewardj8516a1f2012-06-24 14:26:30 +000025007 case 0x2B:
25008 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
25009 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
25010 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25011 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25012 uses_vvvv, vbi, pfx, delta, "vpackusdw",
25013 Iop_QNarrowBin32Sto16Ux8, NULL,
25014 False/*!invertLeftArg*/, True/*swapArgs*/ );
25015 goto decode_success;
25016 }
25017 break;
25018
sewardjc4530ae2012-05-21 10:18:49 +000025019 case 0x30:
25020 /* VPMOVZXBW xmm2/m64, xmm1 */
25021 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
25022 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000025023 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
25024 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000025025 goto decode_success;
25026 }
25027 break;
25028
sewardj4ed05e02012-06-18 15:01:30 +000025029 case 0x31:
25030 /* VPMOVZXBD xmm2/m32, xmm1 */
25031 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
25032 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25033 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
25034 True/*isAvx*/, True/*xIsZ*/ );
25035 goto decode_success;
25036 }
25037 break;
25038
sewardj8516a1f2012-06-24 14:26:30 +000025039 case 0x32:
25040 /* VPMOVZXBQ xmm2/m16, xmm1 */
25041 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
25042 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25043 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
25044 goto decode_success;
25045 }
25046 break;
25047
sewardjc4530ae2012-05-21 10:18:49 +000025048 case 0x33:
25049 /* VPMOVZXWD xmm2/m64, xmm1 */
25050 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
25051 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000025052 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
25053 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000025054 goto decode_success;
25055 }
25056 break;
25057
sewardj8516a1f2012-06-24 14:26:30 +000025058 case 0x34:
25059 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
25060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25061 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
25062 goto decode_success;
25063 }
25064 break;
25065
25066 case 0x35:
25067 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
25068 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25069 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
25070 True/*isAvx*/, True/*xIsZ*/ );
25071 goto decode_success;
25072 }
25073 break;
25074
sewardj56c30312012-06-12 08:45:39 +000025075 case 0x37:
25076 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
25077 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
25078 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25079 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25080 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
25081 goto decode_success;
25082 }
25083 break;
25084
sewardje8a7eb72012-06-12 14:59:17 +000025085 case 0x38:
25086 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
25087 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
25088 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25089 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25090 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
25091 goto decode_success;
25092 }
25093 break;
25094
sewardjc4530ae2012-05-21 10:18:49 +000025095 case 0x39:
sewardj251b59e2012-05-25 13:51:07 +000025096 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000025097 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
25098 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25099 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25100 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
25101 goto decode_success;
25102 }
25103 break;
25104
sewardje8a7eb72012-06-12 14:59:17 +000025105 case 0x3A:
25106 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
25107 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
25108 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25109 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25110 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
25111 goto decode_success;
25112 }
25113 break;
25114
25115 case 0x3B:
25116 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
25117 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
25118 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25119 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25120 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
25121 goto decode_success;
25122 }
25123 break;
25124
25125 case 0x3C:
25126 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
25127 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
25128 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25129 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25130 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
25131 goto decode_success;
25132 }
25133 break;
25134
sewardjc4530ae2012-05-21 10:18:49 +000025135 case 0x3D:
sewardj251b59e2012-05-25 13:51:07 +000025136 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000025137 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
25138 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25139 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25140 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
25141 goto decode_success;
25142 }
25143 break;
25144
sewardje8a7eb72012-06-12 14:59:17 +000025145 case 0x3E:
25146 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
25147 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
25148 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25149 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25150 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
25151 goto decode_success;
25152 }
25153 break;
25154
25155 case 0x3F:
25156 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
25157 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
25158 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25159 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25160 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
25161 goto decode_success;
25162 }
25163 break;
25164
25165 case 0x40:
25166 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
25167 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
25168 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25169 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25170 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
25171 goto decode_success;
25172 }
25173 break;
25174
25175 case 0x41:
25176 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
25177 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25178 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
25179 goto decode_success;
25180 }
25181 break;
25182
sewardj1407a362012-06-24 15:11:38 +000025183 case 0xDB:
25184 case 0xDC:
25185 case 0xDD:
25186 case 0xDE:
25187 case 0xDF:
25188 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
25189 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
25190 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
25191 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
25192 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
25193 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25194 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
25195 if (opc != 0xDB) *uses_vvvv = True;
25196 goto decode_success;
25197 }
25198 break;
25199
sewardjc4530ae2012-05-21 10:18:49 +000025200 default:
25201 break;
25202
25203 }
25204
25205 //decode_failure:
25206 return deltaIN;
25207
25208 decode_success:
25209 return delta;
25210}
25211
25212
25213/*------------------------------------------------------------*/
25214/*--- ---*/
25215/*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
25216/*--- ---*/
25217/*------------------------------------------------------------*/
25218
sewardjfe0c5e72012-06-15 15:48:07 +000025219static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
25220{
25221 vassert(imm8 < 256);
25222 IRTemp s3, s2, s1, s0;
25223 s3 = s2 = s1 = s0 = IRTemp_INVALID;
25224 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
25225# define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
25226 : ((_nn)==2) ? s2 : s3)
25227 IRTemp res = newTemp(Ity_V128);
25228 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
25229 SEL((imm8 >> 4) & 3),
25230 SEL((imm8 >> 2) & 3),
25231 SEL((imm8 >> 0) & 3) ));
25232# undef SEL
25233 return res;
25234}
25235
sewardjc4530ae2012-05-21 10:18:49 +000025236__attribute__((noinline))
25237static
25238Long dis_ESC_0F3A__VEX (
25239 /*MB_OUT*/DisResult* dres,
25240 /*OUT*/ Bool* uses_vvvv,
25241 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
25242 Bool resteerCisOk,
25243 void* callback_opaque,
25244 VexArchInfo* archinfo,
25245 VexAbiInfo* vbi,
25246 Prefix pfx, Int sz, Long deltaIN
25247 )
25248{
25249 IRTemp addr = IRTemp_INVALID;
25250 Int alen = 0;
25251 HChar dis_buf[50];
25252 Long delta = deltaIN;
25253 UChar opc = getUChar(delta);
25254 delta++;
25255 *uses_vvvv = False;
25256
25257 switch (opc) {
25258
sewardjfe0c5e72012-06-15 15:48:07 +000025259 case 0x04:
sewardj21459cb2012-06-18 14:05:52 +000025260 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
25261 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000025262 UChar modrm = getUChar(delta);
25263 UInt imm8 = 0;
25264 UInt rG = gregOfRexRM(pfx, modrm);
25265 IRTemp sV = newTemp(Ity_V256);
25266 if (epartIsReg(modrm)) {
25267 UInt rE = eregOfRexRM(pfx, modrm);
25268 delta += 1;
25269 imm8 = getUChar(delta);
25270 DIP("vpermilps $%u,%s,%s\n",
25271 imm8, nameYMMReg(rE), nameYMMReg(rG));
25272 assign(sV, getYMMReg(rE));
25273 } else {
25274 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25275 delta += alen;
25276 imm8 = getUChar(delta);
25277 DIP("vpermilps $%u,%s,%s\n",
25278 imm8, dis_buf, nameYMMReg(rG));
25279 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
25280 }
25281 delta++;
25282 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
25283 breakupV256toV128s( sV, &sVhi, &sVlo );
25284 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
25285 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
25286 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
25287 putYMMReg(rG, res);
25288 goto decode_success;
25289 }
sewardj21459cb2012-06-18 14:05:52 +000025290 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
25291 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25292 UChar modrm = getUChar(delta);
25293 UInt imm8 = 0;
25294 UInt rG = gregOfRexRM(pfx, modrm);
25295 IRTemp sV = newTemp(Ity_V128);
25296 if (epartIsReg(modrm)) {
25297 UInt rE = eregOfRexRM(pfx, modrm);
25298 delta += 1;
25299 imm8 = getUChar(delta);
25300 DIP("vpermilps $%u,%s,%s\n",
25301 imm8, nameXMMReg(rE), nameXMMReg(rG));
25302 assign(sV, getXMMReg(rE));
25303 } else {
25304 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25305 delta += alen;
25306 imm8 = getUChar(delta);
25307 DIP("vpermilps $%u,%s,%s\n",
25308 imm8, dis_buf, nameXMMReg(rG));
25309 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
25310 }
25311 delta++;
25312 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
25313 goto decode_success;
25314 }
sewardjfe0c5e72012-06-15 15:48:07 +000025315 break;
25316
sewardje8a7eb72012-06-12 14:59:17 +000025317 case 0x05:
sewardj21459cb2012-06-18 14:05:52 +000025318 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
25319 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000025320 UChar modrm = getUChar(delta);
25321 UInt imm8 = 0;
25322 UInt rG = gregOfRexRM(pfx, modrm);
25323 IRTemp sV = newTemp(Ity_V128);
25324 if (epartIsReg(modrm)) {
25325 UInt rE = eregOfRexRM(pfx, modrm);
25326 delta += 1;
25327 imm8 = getUChar(delta);
25328 DIP("vpermilpd $%u,%s,%s\n",
25329 imm8, nameXMMReg(rE), nameXMMReg(rG));
25330 assign(sV, getXMMReg(rE));
25331 } else {
25332 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25333 delta += alen;
25334 imm8 = getUChar(delta);
25335 DIP("vpermilpd $%u,%s,%s\n",
25336 imm8, dis_buf, nameXMMReg(rG));
25337 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
25338 }
25339 delta++;
25340 IRTemp s1 = newTemp(Ity_I64);
25341 IRTemp s0 = newTemp(Ity_I64);
25342 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
25343 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
25344 IRTemp dV = newTemp(Ity_V128);
25345 assign(dV, binop(Iop_64HLtoV128,
25346 mkexpr((imm8 & (1<<1)) ? s1 : s0),
25347 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
25348 putYMMRegLoAndZU(rG, mkexpr(dV));
25349 goto decode_success;
25350 }
sewardj21459cb2012-06-18 14:05:52 +000025351 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
25352 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardje8a7eb72012-06-12 14:59:17 +000025353 UChar modrm = getUChar(delta);
25354 UInt imm8 = 0;
25355 UInt rG = gregOfRexRM(pfx, modrm);
25356 IRTemp sV = newTemp(Ity_V256);
25357 if (epartIsReg(modrm)) {
25358 UInt rE = eregOfRexRM(pfx, modrm);
25359 delta += 1;
25360 imm8 = getUChar(delta);
25361 DIP("vpermilpd $%u,%s,%s\n",
25362 imm8, nameYMMReg(rE), nameYMMReg(rG));
25363 assign(sV, getYMMReg(rE));
25364 } else {
25365 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25366 delta += alen;
25367 imm8 = getUChar(delta);
25368 DIP("vpermilpd $%u,%s,%s\n",
25369 imm8, dis_buf, nameYMMReg(rG));
25370 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
25371 }
25372 delta++;
sewardj4b1cc832012-06-13 11:10:20 +000025373 IRTemp s3, s2, s1, s0;
25374 s3 = s2 = s1 = s0 = IRTemp_INVALID;
25375 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
sewardje8a7eb72012-06-12 14:59:17 +000025376 IRTemp dV = newTemp(Ity_V256);
25377 assign(dV, IRExpr_Qop(Iop_64x4toV256,
25378 mkexpr((imm8 & (1<<3)) ? s3 : s2),
25379 mkexpr((imm8 & (1<<2)) ? s3 : s2),
25380 mkexpr((imm8 & (1<<1)) ? s1 : s0),
25381 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
25382 putYMMReg(rG, mkexpr(dV));
25383 goto decode_success;
25384 }
sewardje8a7eb72012-06-12 14:59:17 +000025385 break;
25386
25387 case 0x06:
25388 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
25389 if (have66noF2noF3(pfx)
25390 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
25391 UChar modrm = getUChar(delta);
25392 UInt imm8 = 0;
25393 UInt rG = gregOfRexRM(pfx, modrm);
25394 UInt rV = getVexNvvvv(pfx);
25395 IRTemp s00 = newTemp(Ity_V128);
25396 IRTemp s01 = newTemp(Ity_V128);
25397 IRTemp s10 = newTemp(Ity_V128);
25398 IRTemp s11 = newTemp(Ity_V128);
25399 assign(s00, getYMMRegLane128(rV, 0));
25400 assign(s01, getYMMRegLane128(rV, 1));
25401 if (epartIsReg(modrm)) {
25402 UInt rE = eregOfRexRM(pfx, modrm);
25403 delta += 1;
25404 imm8 = getUChar(delta);
25405 DIP("vperm2f128 $%u,%s,%s,%s\n",
25406 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
25407 assign(s10, getYMMRegLane128(rE, 0));
25408 assign(s11, getYMMRegLane128(rE, 1));
25409 } else {
25410 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25411 delta += alen;
25412 imm8 = getUChar(delta);
25413 DIP("vperm2f128 $%u,%s,%s,%s\n",
25414 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
25415 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
25416 mkexpr(addr), mkU64(0))));
25417 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
25418 mkexpr(addr), mkU64(16))));
25419 }
25420 delta++;
25421# define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
25422 : ((_nn)==2) ? s10 : s11)
25423 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
25424 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
25425# undef SEL
25426 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
25427 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
25428 *uses_vvvv = True;
25429 goto decode_success;
25430 }
25431 break;
25432
sewardj4f228902012-06-21 09:17:58 +000025433 case 0x08:
sewardja9651762012-06-24 11:09:37 +000025434 /* VROUNDPS imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000025435 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
25436 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25437 UChar modrm = getUChar(delta);
25438 UInt rG = gregOfRexRM(pfx, modrm);
25439 IRTemp src = newTemp(Ity_V128);
25440 IRTemp s0 = IRTemp_INVALID;
25441 IRTemp s1 = IRTemp_INVALID;
25442 IRTemp s2 = IRTemp_INVALID;
25443 IRTemp s3 = IRTemp_INVALID;
25444 IRTemp rm = newTemp(Ity_I32);
25445 Int imm = 0;
25446
25447 modrm = getUChar(delta);
25448
25449 if (epartIsReg(modrm)) {
25450 UInt rE = eregOfRexRM(pfx, modrm);
25451 assign( src, getXMMReg( rE ) );
25452 imm = getUChar(delta+1);
25453 if (imm & ~15) break;
25454 delta += 1+1;
25455 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
25456 } else {
25457 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25458 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
25459 imm = getUChar(delta+alen);
25460 if (imm & ~15) break;
25461 delta += alen+1;
25462 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
25463 }
25464
25465 /* (imm & 3) contains an Intel-encoded rounding mode. Because
25466 that encoding is the same as the encoding for IRRoundingMode,
25467 we can use that value directly in the IR as a rounding
25468 mode. */
25469 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
25470
25471 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
25472 putYMMRegLane128( rG, 1, mkV128(0) );
25473# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
25474 unop(Iop_ReinterpI32asF32, mkexpr(s)))
25475 putYMMRegLane32F( rG, 3, CVT(s3) );
25476 putYMMRegLane32F( rG, 2, CVT(s2) );
25477 putYMMRegLane32F( rG, 1, CVT(s1) );
25478 putYMMRegLane32F( rG, 0, CVT(s0) );
25479# undef CVT
25480 goto decode_success;
25481 }
sewardja9651762012-06-24 11:09:37 +000025482 /* VROUNDPS imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000025483 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
25484 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25485 UChar modrm = getUChar(delta);
25486 UInt rG = gregOfRexRM(pfx, modrm);
25487 IRTemp src = newTemp(Ity_V256);
25488 IRTemp s0 = IRTemp_INVALID;
25489 IRTemp s1 = IRTemp_INVALID;
25490 IRTemp s2 = IRTemp_INVALID;
25491 IRTemp s3 = IRTemp_INVALID;
25492 IRTemp s4 = IRTemp_INVALID;
25493 IRTemp s5 = IRTemp_INVALID;
25494 IRTemp s6 = IRTemp_INVALID;
25495 IRTemp s7 = IRTemp_INVALID;
25496 IRTemp rm = newTemp(Ity_I32);
25497 Int imm = 0;
25498
25499 modrm = getUChar(delta);
25500
25501 if (epartIsReg(modrm)) {
25502 UInt rE = eregOfRexRM(pfx, modrm);
25503 assign( src, getYMMReg( rE ) );
25504 imm = getUChar(delta+1);
25505 if (imm & ~15) break;
25506 delta += 1+1;
25507 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
25508 } else {
25509 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25510 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
25511 imm = getUChar(delta+alen);
25512 if (imm & ~15) break;
25513 delta += alen+1;
25514 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
25515 }
25516
25517 /* (imm & 3) contains an Intel-encoded rounding mode. Because
25518 that encoding is the same as the encoding for IRRoundingMode,
25519 we can use that value directly in the IR as a rounding
25520 mode. */
25521 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
25522
25523 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
25524# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
25525 unop(Iop_ReinterpI32asF32, mkexpr(s)))
25526 putYMMRegLane32F( rG, 7, CVT(s7) );
25527 putYMMRegLane32F( rG, 6, CVT(s6) );
25528 putYMMRegLane32F( rG, 5, CVT(s5) );
25529 putYMMRegLane32F( rG, 4, CVT(s4) );
25530 putYMMRegLane32F( rG, 3, CVT(s3) );
25531 putYMMRegLane32F( rG, 2, CVT(s2) );
25532 putYMMRegLane32F( rG, 1, CVT(s1) );
25533 putYMMRegLane32F( rG, 0, CVT(s0) );
25534# undef CVT
25535 goto decode_success;
25536 }
25537
25538 case 0x09:
sewardja9651762012-06-24 11:09:37 +000025539 /* VROUNDPD imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000025540 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
25541 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25542 UChar modrm = getUChar(delta);
25543 UInt rG = gregOfRexRM(pfx, modrm);
25544 IRTemp src = newTemp(Ity_V128);
25545 IRTemp s0 = IRTemp_INVALID;
25546 IRTemp s1 = IRTemp_INVALID;
25547 IRTemp rm = newTemp(Ity_I32);
25548 Int imm = 0;
25549
25550 modrm = getUChar(delta);
25551
25552 if (epartIsReg(modrm)) {
25553 UInt rE = eregOfRexRM(pfx, modrm);
25554 assign( src, getXMMReg( rE ) );
25555 imm = getUChar(delta+1);
25556 if (imm & ~15) break;
25557 delta += 1+1;
25558 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
25559 } else {
25560 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25561 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
25562 imm = getUChar(delta+alen);
25563 if (imm & ~15) break;
25564 delta += alen+1;
25565 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
25566 }
25567
25568 /* (imm & 3) contains an Intel-encoded rounding mode. Because
25569 that encoding is the same as the encoding for IRRoundingMode,
25570 we can use that value directly in the IR as a rounding
25571 mode. */
25572 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
25573
25574 breakupV128to64s( src, &s1, &s0 );
25575 putYMMRegLane128( rG, 1, mkV128(0) );
25576# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
25577 unop(Iop_ReinterpI64asF64, mkexpr(s)))
25578 putYMMRegLane64F( rG, 1, CVT(s1) );
25579 putYMMRegLane64F( rG, 0, CVT(s0) );
25580# undef CVT
25581 goto decode_success;
25582 }
sewardja9651762012-06-24 11:09:37 +000025583 /* VROUNDPD imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000025584 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
25585 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25586 UChar modrm = getUChar(delta);
25587 UInt rG = gregOfRexRM(pfx, modrm);
25588 IRTemp src = newTemp(Ity_V256);
25589 IRTemp s0 = IRTemp_INVALID;
25590 IRTemp s1 = IRTemp_INVALID;
25591 IRTemp s2 = IRTemp_INVALID;
25592 IRTemp s3 = IRTemp_INVALID;
25593 IRTemp rm = newTemp(Ity_I32);
25594 Int imm = 0;
25595
25596 modrm = getUChar(delta);
25597
25598 if (epartIsReg(modrm)) {
25599 UInt rE = eregOfRexRM(pfx, modrm);
25600 assign( src, getYMMReg( rE ) );
25601 imm = getUChar(delta+1);
25602 if (imm & ~15) break;
25603 delta += 1+1;
25604 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
25605 } else {
25606 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25607 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
25608 imm = getUChar(delta+alen);
25609 if (imm & ~15) break;
25610 delta += alen+1;
25611 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
25612 }
25613
25614 /* (imm & 3) contains an Intel-encoded rounding mode. Because
25615 that encoding is the same as the encoding for IRRoundingMode,
25616 we can use that value directly in the IR as a rounding
25617 mode. */
25618 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
25619
25620 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
25621# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
25622 unop(Iop_ReinterpI64asF64, mkexpr(s)))
25623 putYMMRegLane64F( rG, 3, CVT(s3) );
25624 putYMMRegLane64F( rG, 2, CVT(s2) );
25625 putYMMRegLane64F( rG, 1, CVT(s1) );
25626 putYMMRegLane64F( rG, 0, CVT(s0) );
25627# undef CVT
25628 goto decode_success;
25629 }
25630
25631 case 0x0A:
25632 case 0x0B:
25633 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
25634 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
25635 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
25636 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
25637 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25638 UChar modrm = getUChar(delta);
25639 UInt rG = gregOfRexRM(pfx, modrm);
25640 UInt rV = getVexNvvvv(pfx);
25641 Bool isD = opc == 0x0B;
25642 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
25643 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
25644 Int imm = 0;
25645
25646 if (epartIsReg(modrm)) {
25647 UInt rE = eregOfRexRM(pfx, modrm);
25648 assign( src,
25649 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
25650 imm = getUChar(delta+1);
25651 if (imm & ~15) break;
25652 delta += 1+1;
25653 DIP( "vrounds%c $%d,%s,%s,%s\n",
25654 isD ? 'd' : 's',
25655 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
25656 } else {
25657 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25658 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
25659 imm = getUChar(delta+alen);
25660 if (imm & ~15) break;
25661 delta += alen+1;
25662 DIP( "vrounds%c $%d,%s,%s,%s\n",
25663 isD ? 'd' : 's',
25664 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
25665 }
25666
25667 /* (imm & 3) contains an Intel-encoded rounding mode. Because
25668 that encoding is the same as the encoding for IRRoundingMode,
25669 we can use that value directly in the IR as a rounding
25670 mode. */
25671 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
25672 (imm & 4) ? get_sse_roundingmode()
25673 : mkU32(imm & 3),
25674 mkexpr(src)) );
25675
25676 if (isD)
25677 putXMMRegLane64F( rG, 0, mkexpr(res) );
25678 else {
25679 putXMMRegLane32F( rG, 0, mkexpr(res) );
25680 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
25681 }
25682 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
25683 putYMMRegLane128( rG, 1, mkV128(0) );
25684 *uses_vvvv = True;
25685 goto decode_success;
25686 }
25687 break;
25688
sewardj21459cb2012-06-18 14:05:52 +000025689 case 0x0C:
25690 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
25691 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
25692 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25693 UChar modrm = getUChar(delta);
25694 UInt imm8;
25695 UInt rG = gregOfRexRM(pfx, modrm);
25696 UInt rV = getVexNvvvv(pfx);
25697 IRTemp sV = newTemp(Ity_V256);
25698 IRTemp sE = newTemp(Ity_V256);
25699 assign ( sV, getYMMReg(rV) );
25700 if (epartIsReg(modrm)) {
25701 UInt rE = eregOfRexRM(pfx, modrm);
25702 delta += 1;
25703 imm8 = getUChar(delta);
25704 DIP("vblendps $%u,%s,%s,%s\n",
25705 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
25706 assign(sE, getYMMReg(rE));
25707 } else {
25708 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25709 delta += alen;
25710 imm8 = getUChar(delta);
25711 DIP("vblendps $%u,%s,%s,%s\n",
25712 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
25713 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
25714 }
25715 delta++;
25716 putYMMReg( rG,
25717 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
25718 *uses_vvvv = True;
25719 goto decode_success;
25720 }
25721 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
25722 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
25723 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25724 UChar modrm = getUChar(delta);
25725 UInt imm8;
25726 UInt rG = gregOfRexRM(pfx, modrm);
25727 UInt rV = getVexNvvvv(pfx);
25728 IRTemp sV = newTemp(Ity_V128);
25729 IRTemp sE = newTemp(Ity_V128);
25730 assign ( sV, getXMMReg(rV) );
25731 if (epartIsReg(modrm)) {
25732 UInt rE = eregOfRexRM(pfx, modrm);
25733 delta += 1;
25734 imm8 = getUChar(delta);
25735 DIP("vblendps $%u,%s,%s,%s\n",
25736 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
25737 assign(sE, getXMMReg(rE));
25738 } else {
25739 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25740 delta += alen;
25741 imm8 = getUChar(delta);
25742 DIP("vblendps $%u,%s,%s,%s\n",
25743 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
25744 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
25745 }
25746 delta++;
25747 putYMMRegLoAndZU( rG,
25748 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
25749 *uses_vvvv = True;
25750 goto decode_success;
25751 }
25752 break;
25753
25754 case 0x0D:
25755 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
25756 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
25757 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25758 UChar modrm = getUChar(delta);
25759 UInt imm8;
25760 UInt rG = gregOfRexRM(pfx, modrm);
25761 UInt rV = getVexNvvvv(pfx);
25762 IRTemp sV = newTemp(Ity_V256);
25763 IRTemp sE = newTemp(Ity_V256);
25764 assign ( sV, getYMMReg(rV) );
25765 if (epartIsReg(modrm)) {
25766 UInt rE = eregOfRexRM(pfx, modrm);
25767 delta += 1;
25768 imm8 = getUChar(delta);
25769 DIP("vblendpd $%u,%s,%s,%s\n",
25770 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
25771 assign(sE, getYMMReg(rE));
25772 } else {
25773 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25774 delta += alen;
25775 imm8 = getUChar(delta);
25776 DIP("vblendpd $%u,%s,%s,%s\n",
25777 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
25778 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
25779 }
25780 delta++;
25781 putYMMReg( rG,
25782 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
25783 *uses_vvvv = True;
25784 goto decode_success;
25785 }
25786 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
25787 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
25788 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25789 UChar modrm = getUChar(delta);
25790 UInt imm8;
25791 UInt rG = gregOfRexRM(pfx, modrm);
25792 UInt rV = getVexNvvvv(pfx);
25793 IRTemp sV = newTemp(Ity_V128);
25794 IRTemp sE = newTemp(Ity_V128);
25795 assign ( sV, getXMMReg(rV) );
25796 if (epartIsReg(modrm)) {
25797 UInt rE = eregOfRexRM(pfx, modrm);
25798 delta += 1;
25799 imm8 = getUChar(delta);
25800 DIP("vblendpd $%u,%s,%s,%s\n",
25801 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
25802 assign(sE, getXMMReg(rE));
25803 } else {
25804 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25805 delta += alen;
25806 imm8 = getUChar(delta);
25807 DIP("vblendpd $%u,%s,%s,%s\n",
25808 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
25809 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
25810 }
25811 delta++;
25812 putYMMRegLoAndZU( rG,
25813 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
25814 *uses_vvvv = True;
25815 goto decode_success;
25816 }
25817 break;
25818
25819 case 0x0E:
25820 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
25821 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
25822 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25823 UChar modrm = getUChar(delta);
25824 UInt imm8;
25825 UInt rG = gregOfRexRM(pfx, modrm);
25826 UInt rV = getVexNvvvv(pfx);
25827 IRTemp sV = newTemp(Ity_V128);
25828 IRTemp sE = newTemp(Ity_V128);
25829 assign ( sV, getXMMReg(rV) );
25830 if (epartIsReg(modrm)) {
25831 UInt rE = eregOfRexRM(pfx, modrm);
25832 delta += 1;
25833 imm8 = getUChar(delta);
25834 DIP("vpblendw $%u,%s,%s,%s\n",
25835 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
25836 assign(sE, getXMMReg(rE));
25837 } else {
25838 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25839 delta += alen;
25840 imm8 = getUChar(delta);
25841 DIP("vpblendw $%u,%s,%s,%s\n",
25842 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
25843 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
25844 }
25845 delta++;
25846 putYMMRegLoAndZU( rG,
25847 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
25848 *uses_vvvv = True;
25849 goto decode_success;
25850 }
25851 break;
25852
sewardj151cd3e2012-06-18 13:56:55 +000025853 case 0x0F:
25854 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
25855 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
25856 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25857 UChar modrm = getUChar(delta);
25858 UInt rG = gregOfRexRM(pfx, modrm);
25859 UInt rV = getVexNvvvv(pfx);
25860 IRTemp sV = newTemp(Ity_V128);
25861 IRTemp dV = newTemp(Ity_V128);
25862 UInt imm8;
25863
25864 assign( dV, getXMMReg(rV) );
25865
25866 if ( epartIsReg( modrm ) ) {
25867 UInt rE = eregOfRexRM(pfx, modrm);
25868 assign( sV, getXMMReg(rE) );
25869 imm8 = getUChar(delta+1);
25870 delta += 1+1;
25871 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE),
25872 nameXMMReg(rV), nameXMMReg(rG));
25873 } else {
25874 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25875 gen_SEGV_if_not_16_aligned( addr );
25876 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
25877 imm8 = getUChar(delta+alen);
25878 delta += alen+1;
25879 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf,
25880 nameXMMReg(rV), nameXMMReg(rG));
25881 }
25882
25883 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
25884 putYMMRegLoAndZU( rG, mkexpr(res) );
25885 *uses_vvvv = True;
25886 goto decode_success;
25887 }
25888 break;
25889
sewardje8a7eb72012-06-12 14:59:17 +000025890 case 0x14:
25891 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
25892 if (have66noF2noF3(pfx)
25893 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25894 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
25895 goto decode_success;
25896 }
25897 break;
25898
sewardj82096922012-06-24 14:57:59 +000025899 case 0x15:
25900 /* VPEXTRW imm8, reg/m16, xmm2 */
25901 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
25902 if (have66noF2noF3(pfx)
25903 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25904 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
25905 goto decode_success;
25906 }
25907 break;
25908
sewardjc4530ae2012-05-21 10:18:49 +000025909 case 0x16:
25910 /* VPEXTRD imm8, r32/m32, xmm2 */
25911 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
25912 if (have66noF2noF3(pfx)
25913 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25914 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
25915 goto decode_success;
25916 }
sewardj56c30312012-06-12 08:45:39 +000025917 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
25918 if (have66noF2noF3(pfx)
25919 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25920 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
25921 goto decode_success;
25922 }
sewardjc4530ae2012-05-21 10:18:49 +000025923 break;
25924
sewardjadf357c2012-06-24 13:44:17 +000025925 case 0x17:
25926 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
25927 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25928 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
25929 goto decode_success;
25930 }
25931 break;
25932
sewardjc4530ae2012-05-21 10:18:49 +000025933 case 0x18:
25934 /* VINSERTF128 r/m, rV, rD
sewardj251b59e2012-05-25 13:51:07 +000025935 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000025936 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
25937 if (have66noF2noF3(pfx)
25938 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
25939 UChar modrm = getUChar(delta);
25940 UInt ib = 0;
sewardje8a7eb72012-06-12 14:59:17 +000025941 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000025942 UInt rV = getVexNvvvv(pfx);
25943 IRTemp t128 = newTemp(Ity_V128);
25944 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000025945 UInt rE = eregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000025946 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000025947 assign(t128, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000025948 ib = getUChar(delta);
25949 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000025950 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000025951 } else {
25952 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25953 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
25954 delta += alen;
25955 ib = getUChar(delta);
25956 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000025957 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000025958 }
25959 delta++;
sewardje8a7eb72012-06-12 14:59:17 +000025960 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
25961 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
25962 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000025963 *uses_vvvv = True;
25964 goto decode_success;
25965 }
25966 break;
25967
25968 case 0x19:
sewardjcfca8cd2012-05-27 08:25:42 +000025969 /* VEXTRACTF128 $lane_no, rS, r/m
sewardjc4530ae2012-05-21 10:18:49 +000025970 ::: r/m:V128 = a lane of rS:V256 (RM format) */
25971 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
25972 if (have66noF2noF3(pfx)
25973 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
25974 UChar modrm = getUChar(delta);
25975 UInt ib = 0;
25976 UInt rS = gregOfRexRM(pfx, modrm);
25977 IRTemp t128 = newTemp(Ity_V128);
25978 if (epartIsReg(modrm)) {
25979 UInt rD = eregOfRexRM(pfx, modrm);
25980 delta += 1;
25981 ib = getUChar(delta);
25982 assign(t128, getYMMRegLane128(rS, ib & 1));
sewardjc93904b2012-05-27 13:50:42 +000025983 putYMMRegLoAndZU(rD, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000025984 DIP("vextractf128 $%u,%s,%s\n",
25985 ib, nameXMMReg(rS), nameYMMReg(rD));
25986 } else {
25987 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25988 delta += alen;
25989 ib = getUChar(delta);
25990 assign(t128, getYMMRegLane128(rS, ib & 1));
25991 storeLE(mkexpr(addr), mkexpr(t128));
25992 DIP("vextractf128 $%u,%s,%s\n",
25993 ib, nameYMMReg(rS), dis_buf);
25994 }
25995 delta++;
25996 /* doesn't use vvvv */
25997 goto decode_success;
25998 }
25999 break;
26000
sewardj21459cb2012-06-18 14:05:52 +000026001 case 0x20:
sewardj4ed05e02012-06-18 15:01:30 +000026002 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
26003 if (have66noF2noF3(pfx)
26004 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26005 UChar modrm = getUChar(delta);
26006 UInt rG = gregOfRexRM(pfx, modrm);
26007 UInt rV = getVexNvvvv(pfx);
sewardj21459cb2012-06-18 14:05:52 +000026008 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000026009 IRTemp src_u8 = newTemp(Ity_I8);
sewardj21459cb2012-06-18 14:05:52 +000026010
26011 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000026012 UInt rE = eregOfRexRM(pfx,modrm);
26013 imm8 = (Int)(getUChar(delta+1) & 15);
26014 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
sewardj21459cb2012-06-18 14:05:52 +000026015 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000026016 DIP( "vpinsrb $%d,%s,%s,%s\n",
26017 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000026018 } else {
26019 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj4ed05e02012-06-18 15:01:30 +000026020 imm8 = (Int)(getUChar(delta+alen) & 15);
26021 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj21459cb2012-06-18 14:05:52 +000026022 delta += alen+1;
sewardj4ed05e02012-06-18 15:01:30 +000026023 DIP( "vpinsrb $%d,%s,%s,%s\n",
26024 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000026025 }
26026
26027 IRTemp src_vec = newTemp(Ity_V128);
26028 assign(src_vec, getXMMReg( rV ));
sewardj4ed05e02012-06-18 15:01:30 +000026029 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
26030 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000026031 *uses_vvvv = True;
26032 goto decode_success;
26033 }
sewardj4ed05e02012-06-18 15:01:30 +000026034 break;
sewardj21459cb2012-06-18 14:05:52 +000026035
sewardjcfca8cd2012-05-27 08:25:42 +000026036 case 0x21:
26037 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
26038 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
26039 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26040 UChar modrm = getUChar(delta);
26041 UInt rG = gregOfRexRM(pfx, modrm);
26042 UInt rV = getVexNvvvv(pfx);
26043 UInt imm8;
26044 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
26045 const IRTemp inval = IRTemp_INVALID;
26046
26047 if ( epartIsReg( modrm ) ) {
26048 UInt rE = eregOfRexRM(pfx, modrm);
26049 IRTemp vE = newTemp(Ity_V128);
26050 assign( vE, getXMMReg(rE) );
26051 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000026052 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000026053 imm8 = getUChar(delta+1);
26054 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
26055 delta += 1+1;
26056 DIP( "insertps $%u, %s,%s\n",
26057 imm8, nameXMMReg(rE), nameXMMReg(rG) );
26058 } else {
26059 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26060 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
26061 imm8 = getUChar(delta+alen);
26062 delta += alen+1;
26063 DIP( "insertps $%u, %s,%s\n",
26064 imm8, dis_buf, nameXMMReg(rG) );
26065 }
26066
26067 IRTemp vV = newTemp(Ity_V128);
26068 assign( vV, getXMMReg(rV) );
26069
26070 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
26071 *uses_vvvv = True;
26072 goto decode_success;
26073 }
26074 break;
26075
sewardj6faf7cc2012-05-25 15:53:01 +000026076 case 0x22:
26077 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
26078 if (have66noF2noF3(pfx)
26079 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26080 UChar modrm = getUChar(delta);
26081 UInt rG = gregOfRexRM(pfx, modrm);
26082 UInt rV = getVexNvvvv(pfx);
26083 Int imm8_10;
26084 IRTemp src_u32 = newTemp(Ity_I32);
26085
26086 if ( epartIsReg( modrm ) ) {
26087 UInt rE = eregOfRexRM(pfx,modrm);
26088 imm8_10 = (Int)(getUChar(delta+1) & 3);
26089 assign( src_u32, getIReg32( rE ) );
26090 delta += 1+1;
26091 DIP( "vpinsrd $%d,%s,%s,%s\n",
26092 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
26093 } else {
26094 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26095 imm8_10 = (Int)(getUChar(delta+alen) & 3);
26096 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
26097 delta += alen+1;
sewardj98d02cc2012-06-02 11:55:25 +000026098 DIP( "vpinsrd $%d,%s,%s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000026099 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26100 }
26101
26102 IRTemp src_vec = newTemp(Ity_V128);
26103 assign(src_vec, getXMMReg( rV ));
26104 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
26105 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26106 *uses_vvvv = True;
26107 goto decode_success;
26108 }
sewardj98d02cc2012-06-02 11:55:25 +000026109 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
26110 if (have66noF2noF3(pfx)
26111 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26112 UChar modrm = getUChar(delta);
26113 UInt rG = gregOfRexRM(pfx, modrm);
26114 UInt rV = getVexNvvvv(pfx);
26115 Int imm8_0;
26116 IRTemp src_u64 = newTemp(Ity_I64);
26117
26118 if ( epartIsReg( modrm ) ) {
26119 UInt rE = eregOfRexRM(pfx,modrm);
26120 imm8_0 = (Int)(getUChar(delta+1) & 1);
26121 assign( src_u64, getIReg64( rE ) );
26122 delta += 1+1;
26123 DIP( "vpinsrq $%d,%s,%s,%s\n",
26124 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
26125 } else {
26126 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26127 imm8_0 = (Int)(getUChar(delta+alen) & 1);
26128 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
26129 delta += alen+1;
26130 DIP( "vpinsrd $%d,%s,%s,%s\n",
26131 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26132 }
26133
26134 IRTemp src_vec = newTemp(Ity_V128);
26135 assign(src_vec, getXMMReg( rV ));
26136 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
26137 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26138 *uses_vvvv = True;
26139 goto decode_success;
26140 }
sewardj6faf7cc2012-05-25 15:53:01 +000026141 break;
26142
sewardjadf357c2012-06-24 13:44:17 +000026143 case 0x40:
26144 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
26145 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26146 UChar modrm = getUChar(delta);
26147 UInt rG = gregOfRexRM(pfx, modrm);
26148 UInt rV = getVexNvvvv(pfx);
26149 IRTemp dst_vec = newTemp(Ity_V128);
26150 Int imm8;
26151 if (epartIsReg( modrm )) {
26152 UInt rE = eregOfRexRM(pfx,modrm);
26153 imm8 = (Int)getUChar(delta+1);
26154 assign( dst_vec, getXMMReg( rE ) );
26155 delta += 1+1;
26156 DIP( "vdpps $%d,%s,%s,%s\n",
26157 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
26158 } else {
26159 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26160 imm8 = (Int)getUChar(delta+alen);
26161 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
26162 delta += alen+1;
26163 DIP( "vdpps $%d,%s,%s,%s\n",
26164 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26165 }
26166
26167 IRTemp src_vec = newTemp(Ity_V128);
26168 assign(src_vec, getXMMReg( rV ));
26169 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
26170 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26171 *uses_vvvv = True;
26172 goto decode_success;
26173 }
26174 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
26175 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26176 UChar modrm = getUChar(delta);
26177 UInt rG = gregOfRexRM(pfx, modrm);
26178 UInt rV = getVexNvvvv(pfx);
26179 IRTemp dst_vec = newTemp(Ity_V256);
26180 Int imm8;
26181 if (epartIsReg( modrm )) {
26182 UInt rE = eregOfRexRM(pfx,modrm);
26183 imm8 = (Int)getUChar(delta+1);
26184 assign( dst_vec, getYMMReg( rE ) );
26185 delta += 1+1;
26186 DIP( "vdpps $%d,%s,%s,%s\n",
26187 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
26188 } else {
26189 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26190 imm8 = (Int)getUChar(delta+alen);
26191 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
26192 delta += alen+1;
26193 DIP( "vdpps $%d,%s,%s,%s\n",
26194 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
26195 }
26196
26197 IRTemp src_vec = newTemp(Ity_V256);
26198 assign(src_vec, getYMMReg( rV ));
26199 IRTemp s0, s1, d0, d1;
26200 s0 = s1 = d0 = d1 = IRTemp_INVALID;
26201 breakupV256toV128s( dst_vec, &d1, &d0 );
26202 breakupV256toV128s( src_vec, &s1, &s0 );
26203 putYMMReg( rG, binop( Iop_V128HLtoV256,
26204 mkexpr( math_DPPS_128(s1, d1, imm8) ),
26205 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
26206 *uses_vvvv = True;
26207 goto decode_success;
26208 }
26209 break;
26210
sewardj4ed05e02012-06-18 15:01:30 +000026211 case 0x41:
sewardjadf357c2012-06-24 13:44:17 +000026212 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
sewardj4ed05e02012-06-18 15:01:30 +000026213 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26214 UChar modrm = getUChar(delta);
26215 UInt rG = gregOfRexRM(pfx, modrm);
26216 UInt rV = getVexNvvvv(pfx);
26217 IRTemp dst_vec = newTemp(Ity_V128);
26218 Int imm8;
26219 if (epartIsReg( modrm )) {
26220 UInt rE = eregOfRexRM(pfx,modrm);
26221 imm8 = (Int)getUChar(delta+1);
26222 assign( dst_vec, getXMMReg( rE ) );
26223 delta += 1+1;
26224 DIP( "vdppd $%d,%s,%s,%s\n",
26225 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
26226 } else {
26227 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26228 imm8 = (Int)getUChar(delta+alen);
26229 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
26230 delta += alen+1;
26231 DIP( "vdppd $%d,%s,%s,%s\n",
26232 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26233 }
26234
26235 IRTemp src_vec = newTemp(Ity_V128);
26236 assign(src_vec, getXMMReg( rV ));
26237 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
26238 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26239 *uses_vvvv = True;
26240 goto decode_success;
26241 }
26242 break;
26243
sewardj8516a1f2012-06-24 14:26:30 +000026244 case 0x42:
26245 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
26246 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
26247 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26248 UChar modrm = getUChar(delta);
26249 Int imm8;
26250 IRTemp src_vec = newTemp(Ity_V128);
26251 IRTemp dst_vec = newTemp(Ity_V128);
26252 UInt rG = gregOfRexRM(pfx, modrm);
26253 UInt rV = getVexNvvvv(pfx);
26254
26255 assign( dst_vec, getXMMReg(rV) );
26256
26257 if ( epartIsReg( modrm ) ) {
26258 UInt rE = eregOfRexRM(pfx, modrm);
26259
26260 imm8 = (Int)getUChar(delta+1);
26261 assign( src_vec, getXMMReg(rE) );
26262 delta += 1+1;
26263 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
26264 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
26265 } else {
26266 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
26267 1/* imm8 is 1 byte after the amode */ );
26268 gen_SEGV_if_not_16_aligned( addr );
26269 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
26270 imm8 = (Int)getUChar(delta+alen);
26271 delta += alen+1;
26272 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
26273 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26274 }
26275
26276 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
26277 src_vec, imm8) ) );
26278 *uses_vvvv = True;
26279 goto decode_success;
26280 }
26281 break;
26282
sewardj1407a362012-06-24 15:11:38 +000026283 case 0x44:
26284 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
26285 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
26286 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
26287 * Carry-less multiplication of selected XMM quadwords into XMM
26288 * registers (a.k.a multiplication of polynomials over GF(2))
26289 */
26290 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26291 UChar modrm = getUChar(delta);
26292 Int imm8;
26293 IRTemp sV = newTemp(Ity_V128);
26294 IRTemp dV = newTemp(Ity_V128);
26295 UInt rG = gregOfRexRM(pfx, modrm);
26296 UInt rV = getVexNvvvv(pfx);
26297
26298 assign( dV, getXMMReg(rV) );
26299
26300 if ( epartIsReg( modrm ) ) {
26301 UInt rE = eregOfRexRM(pfx, modrm);
26302 imm8 = (Int)getUChar(delta+1);
26303 assign( sV, getXMMReg(rE) );
26304 delta += 1+1;
26305 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
26306 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
26307 } else {
26308 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
26309 1/* imm8 is 1 byte after the amode */ );
26310 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
26311 imm8 = (Int)getUChar(delta+alen);
26312 delta += alen+1;
26313 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
26314 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
26315 }
26316
26317 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
26318 *uses_vvvv = True;
26319 goto decode_success;
26320 }
26321 break;
26322
sewardj4c0a7ac2012-06-21 09:08:19 +000026323 case 0x4A:
26324 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
26325 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
26326 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
26327 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26328 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
26329 "vblendvps", 4, Iop_SarN32x4 );
26330 *uses_vvvv = True;
26331 goto decode_success;
26332 }
26333 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
26334 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
26335 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
26336 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26337 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
26338 "vblendvps", 4, Iop_SarN32x4 );
26339 *uses_vvvv = True;
26340 goto decode_success;
26341 }
26342 break;
26343
26344 case 0x4B:
26345 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
26346 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
26347 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
26348 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26349 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
26350 "vblendvpd", 8, Iop_SarN64x2 );
26351 *uses_vvvv = True;
26352 goto decode_success;
26353 }
26354 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
26355 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
26356 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
26357 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26358 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
26359 "vblendvpd", 8, Iop_SarN64x2 );
26360 *uses_vvvv = True;
26361 goto decode_success;
26362 }
26363 break;
26364
sewardjc4530ae2012-05-21 10:18:49 +000026365 case 0x4C:
26366 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
26367 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
sewardj4c0a7ac2012-06-21 09:08:19 +000026368 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
26369 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26370 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
26371 "vpblendvb", 1, Iop_SarN8x16 );
sewardjc4530ae2012-05-21 10:18:49 +000026372 *uses_vvvv = True;
26373 goto decode_success;
sewardj4c0a7ac2012-06-21 09:08:19 +000026374 }
26375 break;
sewardjc4530ae2012-05-21 10:18:49 +000026376
sewardjac75d7b2012-05-23 12:42:39 +000026377 case 0x60:
26378 case 0x61:
26379 case 0x62:
26380 case 0x63:
26381 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
26382 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
26383 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
26384 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
26385 (selected special cases that actually occur in glibc,
26386 not by any means a complete implementation.)
26387 */
26388 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26389 Long delta0 = delta;
26390 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
26391 if (delta > delta0) goto decode_success;
26392 /* else fall though; dis_PCMPxSTRx failed to decode it */
26393 }
26394 break;
26395
sewardj1407a362012-06-24 15:11:38 +000026396 case 0xDF:
26397 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
26398 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26399 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
26400 goto decode_success;
26401 }
26402 break;
26403
sewardjc4530ae2012-05-21 10:18:49 +000026404 default:
26405 break;
26406
26407 }
26408
26409 //decode_failure:
26410 return deltaIN;
26411
26412 decode_success:
26413 return delta;
26414}
26415
26416
26417/*------------------------------------------------------------*/
26418/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000026419/*--- Disassemble a single instruction ---*/
sewardj80611e32012-01-20 13:07:24 +000026420/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000026421/*------------------------------------------------------------*/
26422
sewardj9e6491a2005-07-02 19:24:10 +000026423/* Disassemble a single instruction into IR. The instruction is
26424 located in host memory at &guest_code[delta]. */
sewardjdf0e0022005-01-25 15:48:43 +000026425
sewardj9e6491a2005-07-02 19:24:10 +000026426static
26427DisResult disInstr_AMD64_WRK (
sewardje9d8a262009-07-01 08:06:34 +000026428 /*OUT*/Bool* expect_CAS,
sewardjc716aea2006-01-17 01:48:46 +000026429 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
sewardj984d9b12010-01-15 10:53:21 +000026430 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000026431 void* callback_opaque,
sewardj9e6491a2005-07-02 19:24:10 +000026432 Long delta64,
sewardjaca070a2006-10-17 00:28:22 +000026433 VexArchInfo* archinfo,
sewardj2e28ac42008-12-04 00:05:12 +000026434 VexAbiInfo* vbi
sewardj9e6491a2005-07-02 19:24:10 +000026435 )
sewardjdf0e0022005-01-25 15:48:43 +000026436{
sewardj80611e32012-01-20 13:07:24 +000026437 IRTemp t1, t2, t3, t4, t5, t6;
26438 UChar pre;
26439 Int n, n_prefixes;
sewardj9e6491a2005-07-02 19:24:10 +000026440 DisResult dres;
sewardjdf0e0022005-01-25 15:48:43 +000026441
sewardj9e6491a2005-07-02 19:24:10 +000026442 /* The running delta */
26443 Long delta = delta64;
26444
sewardjdf0e0022005-01-25 15:48:43 +000026445 /* Holds eip at the start of the insn, so that we can print
26446 consistent error messages for unimplemented insns. */
sewardj270def42005-07-03 01:03:01 +000026447 Long delta_start = delta;
sewardjdf0e0022005-01-25 15:48:43 +000026448
26449 /* sz denotes the nominal data-op size of the insn; we change it to
26450 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
26451 conflict REX.W takes precedence. */
26452 Int sz = 4;
26453
sewardj3ca55a12005-01-27 16:06:23 +000026454 /* pfx holds the summary of prefixes. */
26455 Prefix pfx = PFX_EMPTY;
sewardjdf0e0022005-01-25 15:48:43 +000026456
sewardjc4530ae2012-05-21 10:18:49 +000026457 /* Holds the computed opcode-escape indication. */
26458 Escape esc = ESC_NONE;
26459
sewardj9e6491a2005-07-02 19:24:10 +000026460 /* Set result defaults. */
sewardjc6f970f2012-04-02 21:54:49 +000026461 dres.whatNext = Dis_Continue;
26462 dres.len = 0;
26463 dres.continueAt = 0;
26464 dres.jk_StopHere = Ijk_INVALID;
sewardje9d8a262009-07-01 08:06:34 +000026465 *expect_CAS = False;
26466
sewardj9e6491a2005-07-02 19:24:10 +000026467 vassert(guest_RIP_next_assumed == 0);
26468 vassert(guest_RIP_next_mustcheck == False);
sewardj4b744762005-02-07 15:02:25 +000026469
sewardj80611e32012-01-20 13:07:24 +000026470 t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
sewardjdf0e0022005-01-25 15:48:43 +000026471
sewardj9e6491a2005-07-02 19:24:10 +000026472 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
26473
sewardjce02aa72006-01-12 12:27:58 +000026474 /* Spot "Special" instructions (see comment at top of file). */
sewardjdf0e0022005-01-25 15:48:43 +000026475 {
26476 UChar* code = (UChar*)(guest_code + delta);
sewardjce02aa72006-01-12 12:27:58 +000026477 /* Spot the 16-byte preamble:
26478 48C1C703 rolq $3, %rdi
26479 48C1C70D rolq $13, %rdi
26480 48C1C73D rolq $61, %rdi
26481 48C1C733 rolq $51, %rdi
sewardjdf0e0022005-01-25 15:48:43 +000026482 */
sewardjce02aa72006-01-12 12:27:58 +000026483 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
26484 && code[ 3] == 0x03 &&
26485 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
26486 && code[ 7] == 0x0D &&
26487 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
26488 && code[11] == 0x3D &&
26489 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
26490 && code[15] == 0x33) {
26491 /* Got a "Special" instruction preamble. Which one is it? */
26492 if (code[16] == 0x48 && code[17] == 0x87
26493 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
26494 /* %RDX = client_request ( %RAX ) */
26495 DIP("%%rdx = client_request ( %%rax )\n");
26496 delta += 19;
sewardjc6f970f2012-04-02 21:54:49 +000026497 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
26498 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000026499 goto decode_success;
26500 }
26501 else
26502 if (code[16] == 0x48 && code[17] == 0x87
26503 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
26504 /* %RAX = guest_NRADDR */
26505 DIP("%%rax = guest_NRADDR\n");
26506 delta += 19;
26507 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
26508 goto decode_success;
26509 }
26510 else
26511 if (code[16] == 0x48 && code[17] == 0x87
26512 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
26513 /* call-noredir *%RAX */
26514 DIP("call-noredir *%%rax\n");
26515 delta += 19;
26516 t1 = newTemp(Ity_I64);
26517 assign(t1, getIRegRAX(8));
26518 t2 = newTemp(Ity_I64);
26519 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
26520 putIReg64(R_RSP, mkexpr(t2));
26521 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
sewardjc6f970f2012-04-02 21:54:49 +000026522 jmp_treg(&dres, Ijk_NoRedir, t1);
26523 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000026524 goto decode_success;
26525 }
26526 /* We don't know what it is. */
26527 goto decode_failure;
26528 /*NOTREACHED*/
sewardjdf0e0022005-01-25 15:48:43 +000026529 }
26530 }
26531
26532 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
26533 as many invalid combinations as possible. */
26534 n_prefixes = 0;
26535 while (True) {
sewardj54477e32007-08-23 18:53:59 +000026536 if (n_prefixes > 7) goto decode_failure;
sewardj8c332e22005-01-28 01:36:56 +000026537 pre = getUChar(delta);
sewardjdf0e0022005-01-25 15:48:43 +000026538 switch (pre) {
26539 case 0x66: pfx |= PFX_66; break;
26540 case 0x67: pfx |= PFX_ASO; break;
26541 case 0xF2: pfx |= PFX_F2; break;
26542 case 0xF3: pfx |= PFX_F3; break;
sewardje9d8a262009-07-01 08:06:34 +000026543 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
sewardjdf0e0022005-01-25 15:48:43 +000026544 case 0x2E: pfx |= PFX_CS; break;
26545 case 0x3E: pfx |= PFX_DS; break;
26546 case 0x26: pfx |= PFX_ES; break;
26547 case 0x64: pfx |= PFX_FS; break;
26548 case 0x65: pfx |= PFX_GS; break;
26549 case 0x36: pfx |= PFX_SS; break;
26550 case 0x40 ... 0x4F:
26551 pfx |= PFX_REX;
26552 if (pre & (1<<3)) pfx |= PFX_REXW;
26553 if (pre & (1<<2)) pfx |= PFX_REXR;
26554 if (pre & (1<<1)) pfx |= PFX_REXX;
26555 if (pre & (1<<0)) pfx |= PFX_REXB;
26556 break;
26557 default:
sewardjc4530ae2012-05-21 10:18:49 +000026558 goto not_a_legacy_prefix;
sewardjdf0e0022005-01-25 15:48:43 +000026559 }
26560 n_prefixes++;
26561 delta++;
26562 }
26563
sewardjc4530ae2012-05-21 10:18:49 +000026564 not_a_legacy_prefix:
26565 /* We've used up all the non-VEX prefixes. Parse and validate a
26566 VEX prefix if that's appropriate. */
26567 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
26568 /* Used temporarily for holding VEX prefixes. */
26569 UChar vex0 = getUChar(delta);
26570 if (vex0 == 0xC4) {
26571 /* 3-byte VEX */
26572 UChar vex1 = getUChar(delta+1);
26573 UChar vex2 = getUChar(delta+2);
26574 delta += 3;
26575 pfx |= PFX_VEX;
26576 /* Snarf contents of byte 1 */
26577 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
26578 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
26579 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
26580 /* m-mmmm */
26581 switch (vex1 & 0x1F) {
26582 case 1: esc = ESC_0F; break;
26583 case 2: esc = ESC_0F38; break;
26584 case 3: esc = ESC_0F3A; break;
26585 /* Any other m-mmmm field will #UD */
26586 default: goto decode_failure;
26587 }
26588 /* Snarf contents of byte 2 */
26589 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
26590 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
26591 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
26592 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
26593 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
26594 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
26595 /* pp */
26596 switch (vex2 & 3) {
26597 case 0: break;
26598 case 1: pfx |= PFX_66; break;
26599 case 2: pfx |= PFX_F3; break;
26600 case 3: pfx |= PFX_F2; break;
26601 default: vassert(0);
26602 }
26603 }
26604 else if (vex0 == 0xC5) {
26605 /* 2-byte VEX */
26606 UChar vex1 = getUChar(delta+1);
26607 delta += 2;
26608 pfx |= PFX_VEX;
26609 /* Snarf contents of byte 1 */
26610 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
26611 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
26612 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
26613 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
26614 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
26615 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
26616 /* pp */
26617 switch (vex1 & 3) {
26618 case 0: break;
26619 case 1: pfx |= PFX_66; break;
26620 case 2: pfx |= PFX_F3; break;
26621 case 3: pfx |= PFX_F2; break;
26622 default: vassert(0);
26623 }
26624 /* implied: */
26625 esc = ESC_0F;
26626 }
26627 /* Can't have both VEX and REX */
26628 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
26629 goto decode_failure; /* can't have both */
26630 }
sewardjdf0e0022005-01-25 15:48:43 +000026631
sewardj42561ef2005-11-04 14:18:31 +000026632 /* Dump invalid combinations */
sewardjdf0e0022005-01-25 15:48:43 +000026633 n = 0;
26634 if (pfx & PFX_F2) n++;
26635 if (pfx & PFX_F3) n++;
sewardj3ca55a12005-01-27 16:06:23 +000026636 if (n > 1)
26637 goto decode_failure; /* can't have both */
sewardjdf0e0022005-01-25 15:48:43 +000026638
26639 n = 0;
26640 if (pfx & PFX_CS) n++;
26641 if (pfx & PFX_DS) n++;
26642 if (pfx & PFX_ES) n++;
26643 if (pfx & PFX_FS) n++;
26644 if (pfx & PFX_GS) n++;
26645 if (pfx & PFX_SS) n++;
sewardj3ca55a12005-01-27 16:06:23 +000026646 if (n > 1)
26647 goto decode_failure; /* multiple seg overrides == illegal */
sewardjdf0e0022005-01-25 15:48:43 +000026648
sewardjceccb292009-01-22 20:40:22 +000026649 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
26650 that we should accept it. */
26651 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero)
26652 goto decode_failure;
26653
26654 /* Ditto for %gs prefixes. */
26655 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60)
26656 goto decode_failure;
sewardj42561ef2005-11-04 14:18:31 +000026657
sewardjdf0e0022005-01-25 15:48:43 +000026658 /* Set up sz. */
26659 sz = 4;
26660 if (pfx & PFX_66) sz = 2;
26661 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
26662
sewardje9d8a262009-07-01 08:06:34 +000026663 /* Now we should be looking at the primary opcode byte or the
sewardj80611e32012-01-20 13:07:24 +000026664 leading escapes. Check that any LOCK prefix is actually
sewardje9d8a262009-07-01 08:06:34 +000026665 allowed. */
sewardj9ff93bc2005-03-23 11:25:12 +000026666 if (pfx & PFX_LOCK) {
sewardjc4356f02007-11-09 21:15:04 +000026667 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
sewardjc4356f02007-11-09 21:15:04 +000026668 DIP("lock ");
26669 } else {
sewardje9d8a262009-07-01 08:06:34 +000026670 *expect_CAS = False;
sewardjc4356f02007-11-09 21:15:04 +000026671 goto decode_failure;
26672 }
sewardjdf0e0022005-01-25 15:48:43 +000026673 }
26674
sewardj80611e32012-01-20 13:07:24 +000026675 /* Eat up opcode escape bytes, until we're really looking at the
sewardjc4530ae2012-05-21 10:18:49 +000026676 primary opcode byte. But only if there's no VEX present. */
26677 if (!(pfx & PFX_VEX)) {
26678 vassert(esc == ESC_NONE);
sewardj80611e32012-01-20 13:07:24 +000026679 pre = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000026680 if (pre == 0x0F) {
26681 delta++;
26682 pre = getUChar(delta);
26683 switch (pre) {
26684 case 0x38: esc = ESC_0F38; delta++; break;
26685 case 0x3A: esc = ESC_0F3A; delta++; break;
26686 default: esc = ESC_0F; break;
26687 }
sewardj80611e32012-01-20 13:07:24 +000026688 }
26689 }
26690
26691 /* So now we're really really looking at the primary opcode
26692 byte. */
26693 Long delta_at_primary_opcode = delta;
sewardjc4530ae2012-05-21 10:18:49 +000026694
26695 if (!(pfx & PFX_VEX)) {
26696 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
26697 instructions preserve the upper 128 bits of YMM registers;
26698 iow we can simply ignore the presence of the upper halves of
26699 these registers. */
26700 switch (esc) {
26701 case ESC_NONE:
26702 delta = dis_ESC_NONE( &dres, expect_CAS,
26703 resteerOkFn, resteerCisOk, callback_opaque,
26704 archinfo, vbi, pfx, sz, delta );
26705 break;
26706 case ESC_0F:
26707 delta = dis_ESC_0F ( &dres, expect_CAS,
26708 resteerOkFn, resteerCisOk, callback_opaque,
26709 archinfo, vbi, pfx, sz, delta );
26710 break;
26711 case ESC_0F38:
26712 delta = dis_ESC_0F38( &dres,
26713 resteerOkFn, resteerCisOk, callback_opaque,
26714 archinfo, vbi, pfx, sz, delta );
26715 break;
26716 case ESC_0F3A:
26717 delta = dis_ESC_0F3A( &dres,
26718 resteerOkFn, resteerCisOk, callback_opaque,
26719 archinfo, vbi, pfx, sz, delta );
26720 break;
26721 default:
26722 vassert(0);
26723 }
26724 } else {
26725 /* VEX prefixed instruction */
26726 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
26727 prefix that loads a YMM register operand ..." zeroes out bits
26728 128 and above of the register. */
26729 Bool uses_vvvv = False;
26730 switch (esc) {
26731 case ESC_0F:
26732 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
26733 resteerOkFn, resteerCisOk,
26734 callback_opaque,
26735 archinfo, vbi, pfx, sz, delta );
26736 break;
26737 case ESC_0F38:
26738 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
26739 resteerOkFn, resteerCisOk,
26740 callback_opaque,
26741 archinfo, vbi, pfx, sz, delta );
26742 break;
26743 case ESC_0F3A:
26744 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
26745 resteerOkFn, resteerCisOk,
26746 callback_opaque,
26747 archinfo, vbi, pfx, sz, delta );
26748 break;
sewardjac75d7b2012-05-23 12:42:39 +000026749 case ESC_NONE:
26750 /* The presence of a VEX prefix, by Intel definition,
26751 always implies at least an 0F escape. */
26752 goto decode_failure;
sewardjc4530ae2012-05-21 10:18:49 +000026753 default:
sewardjac75d7b2012-05-23 12:42:39 +000026754 vassert(0);
sewardjc4530ae2012-05-21 10:18:49 +000026755 }
26756 /* If the insn doesn't use VEX.vvvv then it must be all ones.
26757 Check this. */
26758 if (!uses_vvvv) {
26759 if (getVexNvvvv(pfx) != 0)
26760 goto decode_failure;
26761 }
sewardj80611e32012-01-20 13:07:24 +000026762 }
sewardjc4530ae2012-05-21 10:18:49 +000026763
sewardj80611e32012-01-20 13:07:24 +000026764 vassert(delta - delta_at_primary_opcode >= 0);
26765 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
26766
26767 /* Use delta == delta_at_primary_opcode to denote decode failure.
26768 This implies that any successful decode must use at least one
26769 byte up. */
26770 if (delta == delta_at_primary_opcode)
26771 goto decode_failure;
26772 else
26773 goto decode_success; /* \o/ */
26774
26775#if 0 /* XYZZY */
sewardja6b93d12005-02-17 09:28:28 +000026776
26777 /* ---------------------------------------------------- */
sewardj09717342005-05-05 21:34:02 +000026778 /* --- The SSE/SSE2 decoder. --- */
sewardja6b93d12005-02-17 09:28:28 +000026779 /* ---------------------------------------------------- */
26780
26781 /* What did I do to deserve SSE ? Perhaps I was really bad in a
26782 previous life? */
26783
sewardj09717342005-05-05 21:34:02 +000026784 /* Note, this doesn't handle SSE3 right now. All amd64s support
26785 SSE2 as a minimum so there is no point distinguishing SSE1 vs
26786 SSE2. */
26787
sewardja6b93d12005-02-17 09:28:28 +000026788 insn = (UChar*)&guest_code[delta];
26789
sewardj5abcfe62007-01-10 04:59:33 +000026790 /* FXSAVE is spuriously at the start here only because it is
26791 thusly placed in guest-x86/toIR.c. */
26792
sewardj5abcfe62007-01-10 04:59:33 +000026793 /* ------ SSE decoder main ------ */
sewardj432f8b62005-05-10 02:50:05 +000026794
sewardj5992bd02005-05-11 02:13:42 +000026795 /* ---------------------------------------------------- */
26796 /* --- end of the SSE decoder. --- */
26797 /* ---------------------------------------------------- */
26798
26799 /* ---------------------------------------------------- */
26800 /* --- start of the SSE2 decoder. --- */
26801 /* ---------------------------------------------------- */
sewardj4c328cf2005-05-05 12:05:54 +000026802
sewardjdf0e0022005-01-25 15:48:43 +000026803 /* ---------------------------------------------------- */
26804 /* --- end of the SSE/SSE2 decoder. --- */
26805 /* ---------------------------------------------------- */
26806
sewardjfcf21f32006-08-04 14:51:19 +000026807 /* ---------------------------------------------------- */
26808 /* --- start of the SSE3 decoder. --- */
26809 /* ---------------------------------------------------- */
26810
sewardjfcf21f32006-08-04 14:51:19 +000026811 /* ---------------------------------------------------- */
26812 /* --- end of the SSE3 decoder. --- */
26813 /* ---------------------------------------------------- */
26814
sewardjd166e282008-02-06 11:42:45 +000026815 /* ---------------------------------------------------- */
26816 /* --- start of the SSSE3 decoder. --- */
26817 /* ---------------------------------------------------- */
26818
sewardjd166e282008-02-06 11:42:45 +000026819 /* ---------------------------------------------------- */
26820 /* --- end of the SSSE3 decoder. --- */
26821 /* ---------------------------------------------------- */
26822
de5a70f5c2010-04-01 23:08:59 +000026823 /* ---------------------------------------------------- */
26824 /* --- start of the SSE4 decoder --- */
26825 /* ---------------------------------------------------- */
26826
de5a70f5c2010-04-01 23:08:59 +000026827 /* ---------------------------------------------------- */
26828 /* --- end of the SSE4 decoder --- */
26829 /* ---------------------------------------------------- */
26830
sewardj7a240552005-01-28 21:37:12 +000026831 /*after_sse_decoders:*/
sewardjdf0e0022005-01-25 15:48:43 +000026832
26833 /* Get the primary opcode. */
sewardj8c332e22005-01-28 01:36:56 +000026834 opc = getUChar(delta); delta++;
sewardjdf0e0022005-01-25 15:48:43 +000026835
26836 /* We get here if the current insn isn't SSE, or this CPU doesn't
26837 support SSE. */
26838
26839 switch (opc) {
26840
26841 /* ------------------------ Control flow --------------- */
26842
sewardj3ca55a12005-01-27 16:06:23 +000026843 /* ------------------------ CWD/CDQ -------------------- */
26844
sewardj8d965312005-02-25 02:48:47 +000026845 /* ------------------------ FPU ops -------------------- */
26846
sewardj4fa325a2005-11-03 13:27:24 +000026847 /* ------------------------ INT ------------------------ */
26848
26849 case 0xCD: { /* INT imm8 */
26850 IRJumpKind jk = Ijk_Boring;
26851 if (have66orF2orF3(pfx)) goto decode_failure;
26852 d64 = getUChar(delta); delta++;
26853 switch (d64) {
26854 case 32: jk = Ijk_Sys_int32; break;
26855 default: goto decode_failure;
26856 }
26857 guest_RIP_next_mustcheck = True;
26858 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
26859 jmp_lit(jk, guest_RIP_next_assumed);
26860 /* It's important that all ArchRegs carry their up-to-date value
26861 at this point. So we declare an end-of-block here, which
26862 forces any TempRegs caching ArchRegs to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000026863 vassert(dres.whatNext == Dis_StopHere);
sewardj4fa325a2005-11-03 13:27:24 +000026864 DIP("int $0x%02x\n", (UInt)d64);
26865 break;
26866 }
26867
sewardjf8c37f72005-02-07 18:55:29 +000026868 /* ------------------------ Jcond, byte offset --------- */
26869
sewardj32b2bbe2005-01-28 00:50:10 +000026870 /* ------------------------ IMUL ----------------------- */
26871
sewardj1389d4d2005-01-28 13:46:29 +000026872 /* ------------------------ MOV ------------------------ */
26873
sewardj5e525292005-01-28 15:13:10 +000026874 /* ------------------------ MOVx ------------------------ */
26875
sewardj4c328cf2005-05-05 12:05:54 +000026876 /* ------------------------ opl imm, A ----------------- */
26877
sewardj118b23e2005-01-29 02:14:44 +000026878 /* ------------------------ opl Ev, Gv ----------------- */
sewardj118b23e2005-01-29 02:14:44 +000026879
26880 /* ------------------------ opl Gv, Ev ----------------- */
26881
sewardj55dbb262005-01-28 16:36:51 +000026882 /* ------------------------ POP ------------------------ */
26883
sewardj55dbb262005-01-28 16:36:51 +000026884 /* ------------------------ PUSH ----------------------- */
26885
sewardj909c06d2005-02-19 22:47:41 +000026886 /* ------ AE: SCAS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000026887
sewardj909c06d2005-02-19 22:47:41 +000026888 /* ------ A6, A7: CMPS variants ------ */
sewardja6b93d12005-02-17 09:28:28 +000026889
sewardj909c06d2005-02-19 22:47:41 +000026890 /* ------ AA, AB: STOS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000026891
sewardj909c06d2005-02-19 22:47:41 +000026892 /* ------ A4, A5: MOVS variants ------ */
sewardj7de0d3c2005-02-13 02:26:41 +000026893
26894 /* ------------------------ XCHG ----------------------- */
26895
sewardjbb4396c2007-11-20 17:29:08 +000026896 /* ------------------------ IN / OUT ----------------------- */
26897
sewardj3ca55a12005-01-27 16:06:23 +000026898 /* ------------------------ (Grp1 extensions) ---------- */
26899
sewardj118b23e2005-01-29 02:14:44 +000026900 /* ------------------------ (Grp2 extensions) ---------- */
sewardj03b07cc2005-01-31 18:09:43 +000026901
sewardj32b2bbe2005-01-28 00:50:10 +000026902 /* ------------------------ (Grp3 extensions) ---------- */
26903
sewardj03b07cc2005-01-31 18:09:43 +000026904 /* ------------------------ (Grp4 extensions) ---------- */
26905
sewardj354e5c62005-01-27 20:12:52 +000026906 /* ------------------------ (Grp5 extensions) ---------- */
26907
sewardj3ca55a12005-01-27 16:06:23 +000026908 /* ------------------------ Escapes to 2-byte opcodes -- */
26909
26910 case 0x0F: {
sewardj8c332e22005-01-28 01:36:56 +000026911 opc = getUChar(delta); delta++;
sewardj3ca55a12005-01-27 16:06:23 +000026912 switch (opc) {
26913
sewardj1d511802005-03-27 17:59:45 +000026914 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
26915
sewardjf53b7352005-04-06 20:01:56 +000026916 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
26917
sewardj82c9f2f2005-03-02 16:05:13 +000026918 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
26919
sewardj9ed16802005-08-24 10:46:19 +000026920 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
26921
sewardj3ca55a12005-01-27 16:06:23 +000026922 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
sewardj3ca55a12005-01-27 16:06:23 +000026923
sewardja6b93d12005-02-17 09:28:28 +000026924 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
26925
sewardjd0a12df2005-02-10 02:07:43 +000026926 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
26927
sewardj5e525292005-01-28 15:13:10 +000026928 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
26929
sewardj32b2bbe2005-01-28 00:50:10 +000026930 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
26931
sewardjec387ca2006-08-01 18:36:25 +000026932 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
26933
sewardj1389d4d2005-01-28 13:46:29 +000026934 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
sewardj1389d4d2005-01-28 13:46:29 +000026935
sewardjb04a47c2005-08-10 12:27:46 +000026936 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
26937 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
26938 /* 0F 0D /1 -- prefetchw mem8 */
26939 if (have66orF2orF3(pfx)) goto decode_failure;
26940 modrm = getUChar(delta);
26941 if (epartIsReg(modrm)) goto decode_failure;
26942 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
26943 goto decode_failure;
26944
sewardj2e28ac42008-12-04 00:05:12 +000026945 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjb04a47c2005-08-10 12:27:46 +000026946 delta += alen;
26947
26948 switch (gregLO3ofRM(modrm)) {
26949 case 0: DIP("prefetch %s\n", dis_buf); break;
26950 case 1: DIP("prefetchw %s\n", dis_buf); break;
26951 default: vassert(0); /*NOTREACHED*/
26952 }
26953 break;
26954
sewardj31191072005-02-05 18:24:47 +000026955 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000026956
26957 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000026958
sewardj33ef9c22005-11-04 20:05:57 +000026959 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
26960
sewardje1698952005-02-08 15:02:39 +000026961 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
sewardje1698952005-02-08 15:02:39 +000026962
sewardjb4fd2e72005-03-23 13:34:11 +000026963 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
26964
sewardj0d923d72008-05-13 21:21:16 +000026965 case 0xC0: { /* XADD Gb,Eb */
26966 Bool decode_OK = False;
sewardj2e28ac42008-12-04 00:05:12 +000026967 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
sewardj0d923d72008-05-13 21:21:16 +000026968 if (!decode_OK)
26969 goto decode_failure;
26970 break;
26971 }
sewardj3ca55a12005-01-27 16:06:23 +000026972
sewardjb9dc2432010-06-07 16:22:22 +000026973 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
sewardjb9dc2432010-06-07 16:22:22 +000026974
sewardj3ca55a12005-01-27 16:06:23 +000026975 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
26976
26977 default:
26978 goto decode_failure;
26979 } /* switch (opc) for the 2-byte opcodes */
26980 goto decode_success;
26981 } /* case 0x0F: of primary opcode */
sewardjdf0e0022005-01-25 15:48:43 +000026982
26983 /* ------------------------ ??? ------------------------ */
sewardj80611e32012-01-20 13:07:24 +000026984#endif /* XYZZY */
sewardjdf0e0022005-01-25 15:48:43 +000026985
sewardj80611e32012-01-20 13:07:24 +000026986 //default:
sewardjdf0e0022005-01-25 15:48:43 +000026987 decode_failure:
26988 /* All decode failures end up here. */
26989 vex_printf("vex amd64->IR: unhandled instruction bytes: "
sewardja42c6c62011-01-17 11:58:47 +000026990 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
sewardj8c332e22005-01-28 01:36:56 +000026991 (Int)getUChar(delta_start+0),
26992 (Int)getUChar(delta_start+1),
26993 (Int)getUChar(delta_start+2),
sewardjd166e282008-02-06 11:42:45 +000026994 (Int)getUChar(delta_start+3),
26995 (Int)getUChar(delta_start+4),
sewardja42c6c62011-01-17 11:58:47 +000026996 (Int)getUChar(delta_start+5),
26997 (Int)getUChar(delta_start+6),
26998 (Int)getUChar(delta_start+7) );
sewardjc4530ae2012-05-21 10:18:49 +000026999 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
27000 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
27001 getRexX(pfx), getRexB(pfx));
27002 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
27003 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
27004 getVexNvvvv(pfx),
27005 esc==ESC_NONE ? "NONE" :
27006 esc==ESC_0F ? "0F" :
27007 esc==ESC_0F38 ? "0F38" :
27008 esc==ESC_0F3A ? "0F3A" : "???");
27009 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
27010 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
27011 haveF3(pfx) ? 1 : 0);
sewardjdf0e0022005-01-25 15:48:43 +000027012
27013 /* Tell the dispatcher that this insn cannot be decoded, and so has
27014 not been executed, and (is currently) the next to be executed.
27015 RIP should be up-to-date since it made so at the start of each
27016 insn, but nevertheless be paranoid and update it again right
27017 now. */
sewardj9e6491a2005-07-02 19:24:10 +000027018 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
sewardjc6f970f2012-04-02 21:54:49 +000027019 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
27020 vassert(dres.whatNext == Dis_StopHere);
27021 dres.len = 0;
sewardje9d8a262009-07-01 08:06:34 +000027022 /* We also need to say that a CAS is not expected now, regardless
27023 of what it might have been set to at the start of the function,
27024 since the IR that we've emitted just above (to synthesis a
27025 SIGILL) does not involve any CAS, and presumably no other IR has
27026 been emitted for this (non-decoded) insn. */
27027 *expect_CAS = False;
sewardj9e6491a2005-07-02 19:24:10 +000027028 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000027029
sewardj80611e32012-01-20 13:07:24 +000027030 // } /* switch (opc) for the main (primary) opcode switch. */
sewardjdf0e0022005-01-25 15:48:43 +000027031
27032 decode_success:
27033 /* All decode successes end up here. */
sewardjc6f970f2012-04-02 21:54:49 +000027034 switch (dres.whatNext) {
27035 case Dis_Continue:
27036 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
27037 break;
27038 case Dis_ResteerU:
27039 case Dis_ResteerC:
27040 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
27041 break;
27042 case Dis_StopHere:
27043 break;
27044 default:
27045 vassert(0);
27046 }
27047
sewardjdf0e0022005-01-25 15:48:43 +000027048 DIP("\n");
sewardj9e6491a2005-07-02 19:24:10 +000027049 dres.len = (Int)toUInt(delta - delta_start);
27050 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000027051}
27052
27053#undef DIP
27054#undef DIS
sewardjd20c8852005-01-20 20:04:07 +000027055
sewardj9e6491a2005-07-02 19:24:10 +000027056
27057/*------------------------------------------------------------*/
27058/*--- Top-level fn ---*/
27059/*------------------------------------------------------------*/
27060
27061/* Disassemble a single instruction into IR. The instruction
27062 is located in host memory at &guest_code[delta]. */
27063
sewardjdd40fdf2006-12-24 02:20:24 +000027064DisResult disInstr_AMD64 ( IRSB* irsb_IN,
sewardjc716aea2006-01-17 01:48:46 +000027065 Bool (*resteerOkFn) ( void*, Addr64 ),
sewardj984d9b12010-01-15 10:53:21 +000027066 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000027067 void* callback_opaque,
sewardj9e6491a2005-07-02 19:24:10 +000027068 UChar* guest_code_IN,
27069 Long delta,
27070 Addr64 guest_IP,
sewardja5f55da2006-04-30 23:37:32 +000027071 VexArch guest_arch,
sewardj9e6491a2005-07-02 19:24:10 +000027072 VexArchInfo* archinfo,
sewardjdd40fdf2006-12-24 02:20:24 +000027073 VexAbiInfo* abiinfo,
sewardj9e6491a2005-07-02 19:24:10 +000027074 Bool host_bigendian_IN )
27075{
sewardje9d8a262009-07-01 08:06:34 +000027076 Int i, x1, x2;
27077 Bool expect_CAS, has_CAS;
sewardj9e6491a2005-07-02 19:24:10 +000027078 DisResult dres;
27079
27080 /* Set globals (see top of this file) */
sewardja5f55da2006-04-30 23:37:32 +000027081 vassert(guest_arch == VexArchAMD64);
sewardj9e6491a2005-07-02 19:24:10 +000027082 guest_code = guest_code_IN;
sewardjdd40fdf2006-12-24 02:20:24 +000027083 irsb = irsb_IN;
sewardj9e6491a2005-07-02 19:24:10 +000027084 host_is_bigendian = host_bigendian_IN;
27085 guest_RIP_curr_instr = guest_IP;
27086 guest_RIP_bbstart = guest_IP - delta;
27087
27088 /* We'll consult these after doing disInstr_AMD64_WRK. */
27089 guest_RIP_next_assumed = 0;
27090 guest_RIP_next_mustcheck = False;
27091
sewardje9d8a262009-07-01 08:06:34 +000027092 x1 = irsb_IN->stmts_used;
27093 expect_CAS = False;
sewardjc6f970f2012-04-02 21:54:49 +000027094 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000027095 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000027096 callback_opaque,
sewardjdd40fdf2006-12-24 02:20:24 +000027097 delta, archinfo, abiinfo );
sewardje9d8a262009-07-01 08:06:34 +000027098 x2 = irsb_IN->stmts_used;
27099 vassert(x2 >= x1);
sewardj9e6491a2005-07-02 19:24:10 +000027100
27101 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
27102 got it right. Failure of this assertion is serious and denotes
27103 a bug in disInstr. */
27104 if (guest_RIP_next_mustcheck
27105 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
27106 vex_printf("\n");
27107 vex_printf("assumed next %%rip = 0x%llx\n",
27108 guest_RIP_next_assumed );
27109 vex_printf(" actual next %%rip = 0x%llx\n",
27110 guest_RIP_curr_instr + dres.len );
sewardje9d8a262009-07-01 08:06:34 +000027111 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
27112 }
27113
27114 /* See comment at the top of disInstr_AMD64_WRK for meaning of
27115 expect_CAS. Here, we (sanity-)check for the presence/absence of
27116 IRCAS as directed by the returned expect_CAS value. */
27117 has_CAS = False;
27118 for (i = x1; i < x2; i++) {
27119 if (irsb_IN->stmts[i]->tag == Ist_CAS)
27120 has_CAS = True;
27121 }
27122
27123 if (expect_CAS != has_CAS) {
27124 /* inconsistency detected. re-disassemble the instruction so as
27125 to generate a useful error message; then assert. */
27126 vex_traceflags |= VEX_TRACE_FE;
sewardjc6f970f2012-04-02 21:54:49 +000027127 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000027128 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000027129 callback_opaque,
27130 delta, archinfo, abiinfo );
27131 for (i = x1; i < x2; i++) {
27132 vex_printf("\t\t");
27133 ppIRStmt(irsb_IN->stmts[i]);
27134 vex_printf("\n");
27135 }
27136 /* Failure of this assertion is serious and denotes a bug in
27137 disInstr. */
27138 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
sewardj9e6491a2005-07-02 19:24:10 +000027139 }
27140
27141 return dres;
27142}
27143
27144
sewardj9a660ea2010-07-29 11:34:38 +000027145/*------------------------------------------------------------*/
27146/*--- Unused stuff ---*/
27147/*------------------------------------------------------------*/
27148
27149// A potentially more Memcheck-friendly version of gen_LZCNT, if
27150// this should ever be needed.
27151//
27152//static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
27153//{
27154// /* Scheme is simple: propagate the most significant 1-bit into all
27155// lower positions in the word. This gives a word of the form
27156// 0---01---1. Now invert it, giving a word of the form
27157// 1---10---0, then do a population-count idiom (to count the 1s,
27158// which is the number of leading zeroes, or the word size if the
27159// original word was 0.
27160// */
27161// Int i;
27162// IRTemp t[7];
27163// for (i = 0; i < 7; i++) {
27164// t[i] = newTemp(ty);
27165// }
27166// if (ty == Ity_I64) {
27167// assign(t[0], binop(Iop_Or64, mkexpr(src),
27168// binop(Iop_Shr64, mkexpr(src), mkU8(1))));
27169// assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
27170// binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
27171// assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
27172// binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
27173// assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
27174// binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
27175// assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
27176// binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
27177// assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
27178// binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
27179// assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
27180// return gen_POPCOUNT(ty, t[6]);
27181// }
27182// if (ty == Ity_I32) {
27183// assign(t[0], binop(Iop_Or32, mkexpr(src),
27184// binop(Iop_Shr32, mkexpr(src), mkU8(1))));
27185// assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
27186// binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
27187// assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
27188// binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
27189// assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
27190// binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
27191// assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
27192// binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
27193// assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
27194// return gen_POPCOUNT(ty, t[5]);
27195// }
27196// if (ty == Ity_I16) {
27197// assign(t[0], binop(Iop_Or16, mkexpr(src),
27198// binop(Iop_Shr16, mkexpr(src), mkU8(1))));
27199// assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
27200// binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
27201// assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
27202// binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
27203// assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
27204// binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
27205// assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
27206// return gen_POPCOUNT(ty, t[4]);
27207// }
27208// vassert(0);
27209//}
27210
sewardj9e6491a2005-07-02 19:24:10 +000027211
sewardjd20c8852005-01-20 20:04:07 +000027212/*--------------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +000027213/*--- end guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +000027214/*--------------------------------------------------------------------*/