blob: b71c867289fed50a607aa20c165199979434806d [file] [log] [blame]
sewardjd20c8852005-01-20 20:04:07 +00001
2/*--------------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +00004/*--------------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardjd20c8852005-01-20 20:04:07 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardjd20c8852005-01-20 20:04:07 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardjd20c8852005-01-20 20:04:07 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardjd20c8852005-01-20 20:04:07 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardjd20c8852005-01-20 20:04:07 +000034*/
35
sewardje9d8a262009-07-01 08:06:34 +000036/* Translates AMD64 code to IR. */
sewardj9ff93bc2005-03-23 11:25:12 +000037
sewardj820611e2005-08-24 10:56:01 +000038/* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
42
sewardje9d8a262009-07-01 08:06:34 +000043 x87 FP Limitations:
44
45 * all arithmetic done at 64 bits
46
47 * no FP exceptions, except for handling stack over/underflow
48
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
53
sewardje9d8a262009-07-01 08:06:34 +000054 * some of the FCOM cases could do with testing -- not convinced
55 that the args are the right way round.
56
57 * FSAVE does not re-initialise the FPU; it should do
58
59 * FINIT not only initialises the FPU environment, it also zeroes
60 all the FP registers. It should leave the registers unchanged.
61
sewardje9d8a262009-07-01 08:06:34 +000062 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
63 per Intel docs this bit has no meaning anyway. Since PUSHF is the
64 only way to observe eflags[1], a proper fix would be to make that
65 bit be set by PUSHF.
66
67 This module uses global variables and so is not MT-safe (if that
68 should ever become relevant).
sewardj820611e2005-08-24 10:56:01 +000069*/
sewardj44d494d2005-01-20 20:26:33 +000070
sewardj42561ef2005-11-04 14:18:31 +000071/* Notes re address size overrides (0x67).
72
73 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
75 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 Prefix"):
77
78 0x67 applies to all explicit memory references, causing the top
79 32 bits of the effective address to become zero.
80
81 0x67 has no effect on stack references (push/pop); these always
82 use a 64-bit address.
83
84 0x67 changes the interpretation of instructions which implicitly
85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
86 instead. These are:
87
88 cmp{s,sb,sw,sd,sq}
89 in{s,sb,sw,sd}
90 jcxz, jecxz, jrcxz
91 lod{s,sb,sw,sd,sq}
92 loop{,e,bz,be,z}
93 mov{s,sb,sw,sd,sq}
94 out{s,sb,sw,sd}
95 rep{,e,ne,nz}
96 sca{s,sb,sw,sd,sq}
97 sto{s,sb,sw,sd,sq}
98 xlat{,b} */
99
sewardjce02aa72006-01-12 12:27:58 +0000100/* "Special" instructions.
101
102 This instruction decoder can decode three special instructions
103 which mean nothing natively (are no-ops as far as regs/mem are
104 concerned) but have meaning for supporting Valgrind. A special
105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
108 Following that, one of the following 3 are allowed (standard
109 interpretation in parentheses):
110
111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
florian2245ce92012-08-28 16:49:30 +0000114 4887F6 (xchgq %rdi,%rdi) IR injection
sewardjce02aa72006-01-12 12:27:58 +0000115
116 Any other bytes following the 16-byte preamble are illegal and
117 constitute a failure in instruction decoding. This all assumes
118 that the preamble will never occur except in specific code
119 fragments designed for Valgrind to catch.
120
sewardje9d8a262009-07-01 08:06:34 +0000121 No prefixes may precede a "Special" instruction.
122*/
sewardjce02aa72006-01-12 12:27:58 +0000123
sewardje9d8a262009-07-01 08:06:34 +0000124/* casLE (implementation of lock-prefixed insns) and rep-prefixed
125 insns: the side-exit back to the start of the insn is done with
126 Ijk_Boring. This is quite wrong, it should be done with
127 Ijk_NoRedir, since otherwise the side exit, which is intended to
128 restart the instruction for whatever reason, could go somewhere
129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
130 no-redir jumps performance critical, at least for rep-prefixed
131 instructions, since all iterations thereof would involve such a
132 jump. It's not such a big deal with casLE since the side exit is
133 only taken if the CAS fails, that is, the location is contended,
134 which is relatively unlikely.
sewardj1fb8c922009-07-12 12:56:53 +0000135
136 Note also, the test for CAS success vs failure is done using
137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
139 shouldn't definedness-check these comparisons. See
140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
141 background/rationale.
sewardje9d8a262009-07-01 08:06:34 +0000142*/
143
144/* LOCK prefixed instructions. These are translated using IR-level
145 CAS statements (IRCAS) and are believed to preserve atomicity, even
146 from the point of view of some other process racing against a
147 simulated one (presumably they communicate via a shared memory
148 segment).
149
150 Handlers which are aware of LOCK prefixes are:
151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
152 dis_cmpxchg_G_E (cmpxchg)
153 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
154 dis_Grp3 (not, neg)
155 dis_Grp4 (inc, dec)
156 dis_Grp5 (inc, dec)
157 dis_Grp8_Imm (bts, btc, btr)
158 dis_bt_G_E (bts, btc, btr)
159 dis_xadd_G_E (xadd)
160*/
161
sewardj44d494d2005-01-20 20:26:33 +0000162
163#include "libvex_basictypes.h"
164#include "libvex_ir.h"
165#include "libvex.h"
166#include "libvex_guest_amd64.h"
167
sewardjcef7d3e2009-07-02 12:21:59 +0000168#include "main_util.h"
169#include "main_globals.h"
170#include "guest_generic_bb_to_IR.h"
171#include "guest_generic_x87.h"
172#include "guest_amd64_defs.h"
sewardj44d494d2005-01-20 20:26:33 +0000173
174
sewardjecb94892005-01-21 14:26:37 +0000175/*------------------------------------------------------------*/
176/*--- Globals ---*/
177/*------------------------------------------------------------*/
178
sewardj9e6491a2005-07-02 19:24:10 +0000179/* These are set at the start of the translation of an insn, right
180 down in disInstr_AMD64, so that we don't have to pass them around
181 endlessly. They are all constant during the translation of any
182 given insn. */
sewardj4b744762005-02-07 15:02:25 +0000183
sewardjecb94892005-01-21 14:26:37 +0000184/* These are set at the start of the translation of a BB, so
185 that we don't have to pass them around endlessly. */
186
187/* We need to know this to do sub-register accesses correctly. */
sewardj9b769162014-07-24 12:42:03 +0000188static VexEndness host_endness;
sewardjecb94892005-01-21 14:26:37 +0000189
sewardj9e6491a2005-07-02 19:24:10 +0000190/* Pointer to the guest code area (points to start of BB, not to the
191 insn being processed). */
florian8462d112014-09-24 15:18:09 +0000192static const UChar* guest_code;
sewardjb3a04292005-01-21 20:33:44 +0000193
sewardjdf0e0022005-01-25 15:48:43 +0000194/* The guest address corresponding to guest_code[0]. */
sewardj9e6491a2005-07-02 19:24:10 +0000195static Addr64 guest_RIP_bbstart;
sewardj4b744762005-02-07 15:02:25 +0000196
sewardjb3a04292005-01-21 20:33:44 +0000197/* The guest address for the instruction currently being
198 translated. */
sewardj9e6491a2005-07-02 19:24:10 +0000199static Addr64 guest_RIP_curr_instr;
sewardjecb94892005-01-21 14:26:37 +0000200
sewardjdd40fdf2006-12-24 02:20:24 +0000201/* The IRSB* into which we're generating code. */
202static IRSB* irsb;
sewardjecb94892005-01-21 14:26:37 +0000203
sewardj4b744762005-02-07 15:02:25 +0000204/* For ensuring that %rip-relative addressing is done right. A read
205 of %rip generates the address of the next instruction. It may be
206 that we don't conveniently know that inside disAMode(). For sanity
207 checking, if the next insn %rip is needed, we make a guess at what
208 it is, record that guess here, and set the accompanying Bool to
209 indicate that -- after this insn's decode is finished -- that guess
210 needs to be checked. */
211
212/* At the start of each insn decode, is set to (0, False).
213 After the decode, if _mustcheck is now True, _assumed is
214 checked. */
215
sewardj9e6491a2005-07-02 19:24:10 +0000216static Addr64 guest_RIP_next_assumed;
217static Bool guest_RIP_next_mustcheck;
sewardj4b744762005-02-07 15:02:25 +0000218
219
sewardjecb94892005-01-21 14:26:37 +0000220/*------------------------------------------------------------*/
221/*--- Helpers for constructing IR. ---*/
222/*------------------------------------------------------------*/
223
sewardjb3a04292005-01-21 20:33:44 +0000224/* Generate a new temporary of the given type. */
225static IRTemp newTemp ( IRType ty )
226{
sewardj496a58d2005-03-20 18:44:44 +0000227 vassert(isPlausibleIRType(ty));
sewardjdd40fdf2006-12-24 02:20:24 +0000228 return newIRTemp( irsb->tyenv, ty );
sewardjb3a04292005-01-21 20:33:44 +0000229}
230
sewardjdd40fdf2006-12-24 02:20:24 +0000231/* Add a statement to the list held by "irsb". */
sewardjecb94892005-01-21 14:26:37 +0000232static void stmt ( IRStmt* st )
233{
sewardjdd40fdf2006-12-24 02:20:24 +0000234 addStmtToIRSB( irsb, st );
sewardjecb94892005-01-21 14:26:37 +0000235}
sewardjb3a04292005-01-21 20:33:44 +0000236
237/* Generate a statement "dst := e". */
238static void assign ( IRTemp dst, IRExpr* e )
239{
sewardjdd40fdf2006-12-24 02:20:24 +0000240 stmt( IRStmt_WrTmp(dst, e) );
sewardjb3a04292005-01-21 20:33:44 +0000241}
242
sewardjecb94892005-01-21 14:26:37 +0000243static IRExpr* unop ( IROp op, IRExpr* a )
244{
245 return IRExpr_Unop(op, a);
246}
247
sewardjb3a04292005-01-21 20:33:44 +0000248static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
249{
250 return IRExpr_Binop(op, a1, a2);
251}
252
sewardj4796d662006-02-05 16:06:26 +0000253static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
254{
255 return IRExpr_Triop(op, a1, a2, a3);
256}
257
sewardjdf0e0022005-01-25 15:48:43 +0000258static IRExpr* mkexpr ( IRTemp tmp )
259{
sewardjdd40fdf2006-12-24 02:20:24 +0000260 return IRExpr_RdTmp(tmp);
sewardjdf0e0022005-01-25 15:48:43 +0000261}
sewardjb3a04292005-01-21 20:33:44 +0000262
sewardj3ca55a12005-01-27 16:06:23 +0000263static IRExpr* mkU8 ( ULong i )
sewardjb3a04292005-01-21 20:33:44 +0000264{
265 vassert(i < 256);
sewardj3ca55a12005-01-27 16:06:23 +0000266 return IRExpr_Const(IRConst_U8( (UChar)i ));
sewardjb3a04292005-01-21 20:33:44 +0000267}
268
sewardj5e525292005-01-28 15:13:10 +0000269static IRExpr* mkU16 ( ULong i )
270{
271 vassert(i < 0x10000ULL);
272 return IRExpr_Const(IRConst_U16( (UShort)i ));
273}
sewardj3ca55a12005-01-27 16:06:23 +0000274
275static IRExpr* mkU32 ( ULong i )
276{
277 vassert(i < 0x100000000ULL);
278 return IRExpr_Const(IRConst_U32( (UInt)i ));
279}
sewardjb3a04292005-01-21 20:33:44 +0000280
281static IRExpr* mkU64 ( ULong i )
282{
283 return IRExpr_Const(IRConst_U64(i));
284}
sewardjecb94892005-01-21 14:26:37 +0000285
sewardj3ca55a12005-01-27 16:06:23 +0000286static IRExpr* mkU ( IRType ty, ULong i )
287{
288 switch (ty) {
289 case Ity_I8: return mkU8(i);
sewardj5e525292005-01-28 15:13:10 +0000290 case Ity_I16: return mkU16(i);
sewardj3ca55a12005-01-27 16:06:23 +0000291 case Ity_I32: return mkU32(i);
292 case Ity_I64: return mkU64(i);
293 default: vpanic("mkU(amd64)");
294 }
295}
296
sewardj5e525292005-01-28 15:13:10 +0000297static void storeLE ( IRExpr* addr, IRExpr* data )
298{
sewardje768e922009-11-26 17:17:37 +0000299 stmt( IRStmt_Store(Iend_LE, addr, data) );
sewardj5e525292005-01-28 15:13:10 +0000300}
301
sewardje768e922009-11-26 17:17:37 +0000302static IRExpr* loadLE ( IRType ty, IRExpr* addr )
sewardj5e525292005-01-28 15:13:10 +0000303{
sewardje768e922009-11-26 17:17:37 +0000304 return IRExpr_Load(Iend_LE, ty, addr);
sewardj5e525292005-01-28 15:13:10 +0000305}
306
307static IROp mkSizedOp ( IRType ty, IROp op8 )
308{
309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
310 || op8 == Iop_Mul8
311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
sewardj1fb8c922009-07-12 12:56:53 +0000314 || op8 == Iop_CasCmpNE8
sewardj5e525292005-01-28 15:13:10 +0000315 || op8 == Iop_Not8 );
316 switch (ty) {
317 case Ity_I8: return 0 +op8;
318 case Ity_I16: return 1 +op8;
319 case Ity_I32: return 2 +op8;
320 case Ity_I64: return 3 +op8;
321 default: vpanic("mkSizedOp(amd64)");
322 }
323}
324
325static
326IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
327{
328 if (szSmall == 1 && szBig == 4) {
329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
330 }
331 if (szSmall == 1 && szBig == 2) {
332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
333 }
334 if (szSmall == 2 && szBig == 4) {
335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
336 }
337 if (szSmall == 1 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000338 return unop(Iop_8Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000339 }
sewardj03b07cc2005-01-31 18:09:43 +0000340 if (szSmall == 1 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000341 return unop(Iop_8Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000342 }
sewardj5e525292005-01-28 15:13:10 +0000343 if (szSmall == 2 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000344 return unop(Iop_16Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000345 }
sewardj03b07cc2005-01-31 18:09:43 +0000346 if (szSmall == 2 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000347 return unop(Iop_16Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000348 }
sewardj5e525292005-01-28 15:13:10 +0000349 vpanic("doScalarWidening(amd64)");
350}
351
352
sewardjecb94892005-01-21 14:26:37 +0000353
354/*------------------------------------------------------------*/
355/*--- Debugging output ---*/
356/*------------------------------------------------------------*/
357
sewardjb3a04292005-01-21 20:33:44 +0000358/* Bomb out if we can't handle something. */
359__attribute__ ((noreturn))
florian55085f82012-11-21 00:36:55 +0000360static void unimplemented ( const HChar* str )
sewardjb3a04292005-01-21 20:33:44 +0000361{
362 vex_printf("amd64toIR: unimplemented feature\n");
363 vpanic(str);
364}
365
sewardjecb94892005-01-21 14:26:37 +0000366#define DIP(format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000367 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000368 vex_printf(format, ## args)
369
370#define DIS(buf, format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000371 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000372 vex_sprintf(buf, format, ## args)
373
374
375/*------------------------------------------------------------*/
376/*--- Offsets of various parts of the amd64 guest state. ---*/
377/*------------------------------------------------------------*/
378
379#define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
380#define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
381#define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
382#define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
383#define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
384#define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
385#define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
386#define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
387#define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
388#define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
389#define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
390#define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
391#define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
392#define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
393#define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
394#define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
395
396#define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
397
philippee2cc4de2014-12-16 23:57:51 +0000398#define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
399#define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
sewardja6b93d12005-02-17 09:28:28 +0000400
sewardjecb94892005-01-21 14:26:37 +0000401#define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
402#define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
403#define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
404#define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
405
sewardj8d965312005-02-25 02:48:47 +0000406#define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
407#define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
sewardjd0a12df2005-02-10 02:07:43 +0000408#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
sewardj5e120aa2010-09-28 15:59:04 +0000409#define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
sewardj85520e42005-02-19 15:22:38 +0000410#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
sewardj8d965312005-02-25 02:48:47 +0000411#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
sewardj25a85812005-05-08 23:03:48 +0000412#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
sewardjc49ce232005-02-25 13:03:03 +0000413#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
sewardj1001dc42005-02-21 08:25:55 +0000414
415#define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
sewardjc4530ae2012-05-21 10:18:49 +0000416#define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
417#define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
418#define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
419#define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
420#define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
421#define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
422#define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
423#define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
424#define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
425#define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
426#define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
427#define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
428#define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
429#define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
430#define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
431#define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
432#define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
sewardj1001dc42005-02-21 08:25:55 +0000433
florian6ef84be2012-08-26 03:20:07 +0000434#define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
sewardj05f5e012014-05-04 10:52:11 +0000435#define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
436#define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
sewardjdf0e0022005-01-25 15:48:43 +0000437
sewardjce02aa72006-01-12 12:27:58 +0000438#define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
439
sewardjdf0e0022005-01-25 15:48:43 +0000440
441/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +0000442/*--- Helper bits and pieces for deconstructing the ---*/
443/*--- amd64 insn stream. ---*/
444/*------------------------------------------------------------*/
445
446/* This is the AMD64 register encoding -- integer regs. */
447#define R_RAX 0
448#define R_RCX 1
449#define R_RDX 2
450#define R_RBX 3
451#define R_RSP 4
452#define R_RBP 5
453#define R_RSI 6
454#define R_RDI 7
455#define R_R8 8
456#define R_R9 9
457#define R_R10 10
458#define R_R11 11
459#define R_R12 12
460#define R_R13 13
461#define R_R14 14
462#define R_R15 15
463
sewardjecb94892005-01-21 14:26:37 +0000464/* This is the Intel register encoding -- segment regs. */
465#define R_ES 0
466#define R_CS 1
467#define R_SS 2
468#define R_DS 3
469#define R_FS 4
470#define R_GS 5
471
472
sewardjb3a04292005-01-21 20:33:44 +0000473/* Various simple conversions */
474
475static ULong extend_s_8to64 ( UChar x )
476{
477 return (ULong)((((Long)x) << 56) >> 56);
478}
479
480static ULong extend_s_16to64 ( UShort x )
481{
482 return (ULong)((((Long)x) << 48) >> 48);
483}
484
485static ULong extend_s_32to64 ( UInt x )
486{
487 return (ULong)((((Long)x) << 32) >> 32);
488}
489
sewardjdf0e0022005-01-25 15:48:43 +0000490/* Figure out whether the mod and rm parts of a modRM byte refer to a
491 register or memory. If so, the byte will have the form 11XXXYYY,
492 where YYY is the register number. */
sewardj5b470602005-02-27 13:10:48 +0000493inline
sewardjdf0e0022005-01-25 15:48:43 +0000494static Bool epartIsReg ( UChar mod_reg_rm )
495{
sewardj7a240552005-01-28 21:37:12 +0000496 return toBool(0xC0 == (mod_reg_rm & 0xC0));
sewardjdf0e0022005-01-25 15:48:43 +0000497}
498
sewardj901ed122005-02-27 13:25:31 +0000499/* Extract the 'g' field from a modRM byte. This only produces 3
500 bits, which is not a complete register number. You should avoid
501 this function if at all possible. */
502inline
503static Int gregLO3ofRM ( UChar mod_reg_rm )
sewardjdf0e0022005-01-25 15:48:43 +0000504{
505 return (Int)( (mod_reg_rm >> 3) & 7 );
506}
507
sewardj8711f662005-05-09 17:52:56 +0000508/* Ditto the 'e' field of a modRM byte. */
509inline
510static Int eregLO3ofRM ( UChar mod_reg_rm )
511{
512 return (Int)(mod_reg_rm & 0x7);
513}
514
sewardjdf0e0022005-01-25 15:48:43 +0000515/* Get a 8/16/32-bit unsigned value out of the insn stream. */
516
sewardj80611e32012-01-20 13:07:24 +0000517static inline UChar getUChar ( Long delta )
sewardjdf0e0022005-01-25 15:48:43 +0000518{
sewardj8c332e22005-01-28 01:36:56 +0000519 UChar v = guest_code[delta+0];
520 return v;
sewardjdf0e0022005-01-25 15:48:43 +0000521}
522
sewardj47c2d4d2006-11-14 17:50:16 +0000523static UInt getUDisp16 ( Long delta )
524{
525 UInt v = guest_code[delta+1]; v <<= 8;
526 v |= guest_code[delta+0];
527 return v & 0xFFFF;
528}
529
sewardj270def42005-07-03 01:03:01 +0000530//.. static UInt getUDisp ( Int size, Long delta )
sewardjd20c8852005-01-20 20:04:07 +0000531//.. {
532//.. switch (size) {
533//.. case 4: return getUDisp32(delta);
534//.. case 2: return getUDisp16(delta);
535//.. case 1: return getUChar(delta);
536//.. default: vpanic("getUDisp(x86)");
537//.. }
538//.. return 0; /*notreached*/
539//.. }
sewardjb3a04292005-01-21 20:33:44 +0000540
541
542/* Get a byte value out of the insn stream and sign-extend to 64
543 bits. */
sewardj270def42005-07-03 01:03:01 +0000544static Long getSDisp8 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000545{
546 return extend_s_8to64( guest_code[delta] );
547}
548
sewardj5e525292005-01-28 15:13:10 +0000549/* Get a 16-bit value out of the insn stream and sign-extend to 64
550 bits. */
sewardj270def42005-07-03 01:03:01 +0000551static Long getSDisp16 ( Long delta )
sewardj5e525292005-01-28 15:13:10 +0000552{
sewardj118b23e2005-01-29 02:14:44 +0000553 UInt v = guest_code[delta+1]; v <<= 8;
sewardj5e525292005-01-28 15:13:10 +0000554 v |= guest_code[delta+0];
sewardj118b23e2005-01-29 02:14:44 +0000555 return extend_s_16to64( (UShort)v );
sewardj5e525292005-01-28 15:13:10 +0000556}
557
sewardjb3a04292005-01-21 20:33:44 +0000558/* Get a 32-bit value out of the insn stream and sign-extend to 64
559 bits. */
sewardj270def42005-07-03 01:03:01 +0000560static Long getSDisp32 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000561{
562 UInt v = guest_code[delta+3]; v <<= 8;
563 v |= guest_code[delta+2]; v <<= 8;
564 v |= guest_code[delta+1]; v <<= 8;
565 v |= guest_code[delta+0];
566 return extend_s_32to64( v );
567}
568
sewardj03b07cc2005-01-31 18:09:43 +0000569/* Get a 64-bit value out of the insn stream. */
sewardj270def42005-07-03 01:03:01 +0000570static Long getDisp64 ( Long delta )
sewardj03b07cc2005-01-31 18:09:43 +0000571{
sewardj7eaa7cf2005-01-31 18:55:22 +0000572 ULong v = 0;
sewardj03b07cc2005-01-31 18:09:43 +0000573 v |= guest_code[delta+7]; v <<= 8;
574 v |= guest_code[delta+6]; v <<= 8;
575 v |= guest_code[delta+5]; v <<= 8;
576 v |= guest_code[delta+4]; v <<= 8;
577 v |= guest_code[delta+3]; v <<= 8;
578 v |= guest_code[delta+2]; v <<= 8;
579 v |= guest_code[delta+1]; v <<= 8;
580 v |= guest_code[delta+0];
581 return v;
582}
583
sewardj3ca55a12005-01-27 16:06:23 +0000584/* Note: because AMD64 doesn't allow 64-bit literals, it is an error
585 if this is called with size==8. Should not happen. */
sewardj270def42005-07-03 01:03:01 +0000586static Long getSDisp ( Int size, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +0000587{
588 switch (size) {
589 case 4: return getSDisp32(delta);
sewardj5e525292005-01-28 15:13:10 +0000590 case 2: return getSDisp16(delta);
sewardj3ca55a12005-01-27 16:06:23 +0000591 case 1: return getSDisp8(delta);
592 default: vpanic("getSDisp(amd64)");
593 }
594}
595
sewardj1389d4d2005-01-28 13:46:29 +0000596static ULong mkSizeMask ( Int sz )
sewardj3ca55a12005-01-27 16:06:23 +0000597{
598 switch (sz) {
sewardj1389d4d2005-01-28 13:46:29 +0000599 case 1: return 0x00000000000000FFULL;
600 case 2: return 0x000000000000FFFFULL;
601 case 4: return 0x00000000FFFFFFFFULL;
sewardj3ca55a12005-01-27 16:06:23 +0000602 case 8: return 0xFFFFFFFFFFFFFFFFULL;
603 default: vpanic("mkSzMask(amd64)");
604 }
605}
606
607static Int imin ( Int a, Int b )
608{
609 return (a < b) ? a : b;
610}
sewardjecb94892005-01-21 14:26:37 +0000611
sewardj5b470602005-02-27 13:10:48 +0000612static IRType szToITy ( Int n )
613{
614 switch (n) {
615 case 1: return Ity_I8;
616 case 2: return Ity_I16;
617 case 4: return Ity_I32;
618 case 8: return Ity_I64;
sewardjf53b7352005-04-06 20:01:56 +0000619 default: vex_printf("\nszToITy(%d)\n", n);
620 vpanic("szToITy(amd64)");
sewardj5b470602005-02-27 13:10:48 +0000621 }
622}
623
sewardjecb94892005-01-21 14:26:37 +0000624
625/*------------------------------------------------------------*/
626/*--- For dealing with prefixes. ---*/
627/*------------------------------------------------------------*/
628
629/* The idea is to pass around an int holding a bitmask summarising
630 info from the prefixes seen on the current instruction, including
631 info from the REX byte. This info is used in various places, but
632 most especially when making sense of register fields in
633 instructions.
634
sewardjc4530ae2012-05-21 10:18:49 +0000635 The top 8 bits of the prefix are 0x55, just as a hacky way to
636 ensure it really is a valid prefix.
sewardjdf0e0022005-01-25 15:48:43 +0000637
638 Things you can safely assume about a well-formed prefix:
639 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
sewardj5b470602005-02-27 13:10:48 +0000640 * if REX is not present then REXW,REXR,REXX,REXB will read
641 as zero.
sewardjdf0e0022005-01-25 15:48:43 +0000642 * F2 and F3 will not both be 1.
sewardjecb94892005-01-21 14:26:37 +0000643*/
644
645typedef UInt Prefix;
646
sewardjc4530ae2012-05-21 10:18:49 +0000647#define PFX_ASO (1<<0) /* address-size override present (0x67) */
648#define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
649#define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
650#define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
651#define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
652#define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
653#define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
654#define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
655#define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
656#define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
657#define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
658#define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
659#define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
660#define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
661#define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
662#define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
663#define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
664#define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
665/* The extra register field VEX.vvvv is encoded (after not-ing it) as
666 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
667 positions. */
668#define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
669#define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
670#define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
671#define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
sewardj3ca55a12005-01-27 16:06:23 +0000672
sewardjc4530ae2012-05-21 10:18:49 +0000673
674#define PFX_EMPTY 0x55000000
sewardjecb94892005-01-21 14:26:37 +0000675
sewardjb3a04292005-01-21 20:33:44 +0000676static Bool IS_VALID_PFX ( Prefix pfx ) {
sewardjc4530ae2012-05-21 10:18:49 +0000677 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
sewardjecb94892005-01-21 14:26:37 +0000678}
679
sewardjb3a04292005-01-21 20:33:44 +0000680static Bool haveREX ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000681 return toBool(pfx & PFX_REX);
sewardjecb94892005-01-21 14:26:37 +0000682}
683
sewardj5e525292005-01-28 15:13:10 +0000684static Int getRexW ( Prefix pfx ) {
685 return (pfx & PFX_REXW) ? 1 : 0;
686}
sewardjdf0e0022005-01-25 15:48:43 +0000687static Int getRexR ( Prefix pfx ) {
688 return (pfx & PFX_REXR) ? 1 : 0;
689}
sewardj5b470602005-02-27 13:10:48 +0000690static Int getRexX ( Prefix pfx ) {
691 return (pfx & PFX_REXX) ? 1 : 0;
692}
sewardjdf0e0022005-01-25 15:48:43 +0000693static Int getRexB ( Prefix pfx ) {
694 return (pfx & PFX_REXB) ? 1 : 0;
695}
696
sewardj3ca55a12005-01-27 16:06:23 +0000697/* Check a prefix doesn't have F2 or F3 set in it, since usually that
698 completely changes what instruction it really is. */
699static Bool haveF2orF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000700 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
sewardj3ca55a12005-01-27 16:06:23 +0000701}
sewardj38b1d692013-10-15 17:21:42 +0000702static Bool haveF2andF3 ( Prefix pfx ) {
703 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
704}
sewardj55dbb262005-01-28 16:36:51 +0000705static Bool haveF2 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000706 return toBool((pfx & PFX_F2) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000707}
708static Bool haveF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000709 return toBool((pfx & PFX_F3) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000710}
sewardj6359f812005-07-20 10:15:34 +0000711
sewardjc8b26352005-07-20 09:23:13 +0000712static Bool have66 ( Prefix pfx ) {
713 return toBool((pfx & PFX_66) > 0);
714}
sewardj6359f812005-07-20 10:15:34 +0000715static Bool haveASO ( Prefix pfx ) {
716 return toBool((pfx & PFX_ASO) > 0);
717}
sewardj38b1d692013-10-15 17:21:42 +0000718static Bool haveLOCK ( Prefix pfx ) {
719 return toBool((pfx & PFX_LOCK) > 0);
720}
sewardjecb94892005-01-21 14:26:37 +0000721
sewardj1001dc42005-02-21 08:25:55 +0000722/* Return True iff pfx has 66 set and F2 and F3 clear */
723static Bool have66noF2noF3 ( Prefix pfx )
724{
725 return
sewardj8d965312005-02-25 02:48:47 +0000726 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
sewardj1001dc42005-02-21 08:25:55 +0000727}
728
729/* Return True iff pfx has F2 set and 66 and F3 clear */
730static Bool haveF2no66noF3 ( Prefix pfx )
731{
732 return
sewardj8d965312005-02-25 02:48:47 +0000733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
734}
735
736/* Return True iff pfx has F3 set and 66 and F2 clear */
737static Bool haveF3no66noF2 ( Prefix pfx )
738{
739 return
740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
sewardj1001dc42005-02-21 08:25:55 +0000741}
742
sewardjfd181282010-06-14 21:29:35 +0000743/* Return True iff pfx has F3 set and F2 clear */
744static Bool haveF3noF2 ( Prefix pfx )
745{
746 return
747 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
748}
749
sewardj186f8692011-01-21 17:51:44 +0000750/* Return True iff pfx has F2 set and F3 clear */
751static Bool haveF2noF3 ( Prefix pfx )
752{
753 return
754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
755}
756
sewardj1001dc42005-02-21 08:25:55 +0000757/* Return True iff pfx has 66, F2 and F3 clear */
758static Bool haveNo66noF2noF3 ( Prefix pfx )
759{
760 return
sewardj8d965312005-02-25 02:48:47 +0000761 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
sewardj1001dc42005-02-21 08:25:55 +0000762}
763
sewardj8711f662005-05-09 17:52:56 +0000764/* Return True iff pfx has any of 66, F2 and F3 set */
765static Bool have66orF2orF3 ( Prefix pfx )
766{
sewardjca673ab2005-05-11 10:03:08 +0000767 return toBool( ! haveNo66noF2noF3(pfx) );
sewardj8711f662005-05-09 17:52:56 +0000768}
769
mjw67ac3fd2014-05-09 11:41:06 +0000770/* Return True iff pfx has 66 or F3 set */
771static Bool have66orF3 ( Prefix pfx )
sewardj47c2d4d2006-11-14 17:50:16 +0000772{
mjw67ac3fd2014-05-09 11:41:06 +0000773 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
sewardj47c2d4d2006-11-14 17:50:16 +0000774}
775
sewardj1389d4d2005-01-28 13:46:29 +0000776/* Clear all the segment-override bits in a prefix. */
777static Prefix clearSegBits ( Prefix p )
778{
sewardj1001dc42005-02-21 08:25:55 +0000779 return
780 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
781}
782
sewardjc4530ae2012-05-21 10:18:49 +0000783/* Get the (inverted, hence back to "normal") VEX.vvvv field. */
784static UInt getVexNvvvv ( Prefix pfx ) {
785 UInt r = (UInt)pfx;
786 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
787 return r & 0xF;
788}
789
790static Bool haveVEX ( Prefix pfx ) {
791 return toBool(pfx & PFX_VEX);
792}
793
794static Int getVexL ( Prefix pfx ) {
795 return (pfx & PFX_VEXL) ? 1 : 0;
796}
797
sewardj1389d4d2005-01-28 13:46:29 +0000798
sewardjecb94892005-01-21 14:26:37 +0000799/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +0000800/*--- For dealing with escapes ---*/
801/*------------------------------------------------------------*/
802
803
804/* Escapes come after the prefixes, but before the primary opcode
805 byte. They escape the primary opcode byte into a bigger space.
806 The 0xF0000000 isn't significant, except so as to make it not
807 overlap valid Prefix values, for sanity checking.
808*/
809
810typedef
811 enum {
812 ESC_NONE=0xF0000000, // none
813 ESC_0F, // 0F
814 ESC_0F38, // 0F 38
815 ESC_0F3A // 0F 3A
816 }
817 Escape;
818
819
820/*------------------------------------------------------------*/
sewardj5b470602005-02-27 13:10:48 +0000821/*--- For dealing with integer registers ---*/
sewardjecb94892005-01-21 14:26:37 +0000822/*------------------------------------------------------------*/
823
sewardj5b470602005-02-27 13:10:48 +0000824/* This is somewhat complex. The rules are:
825
826 For 64, 32 and 16 bit register references, the e or g fields in the
827 modrm bytes supply the low 3 bits of the register number. The
828 fourth (most-significant) bit of the register number is supplied by
829 the REX byte, if it is present; else that bit is taken to be zero.
830
831 The REX.R bit supplies the high bit corresponding to the g register
832 field, and the REX.B bit supplies the high bit corresponding to the
833 e register field (when the mod part of modrm indicates that modrm's
834 e component refers to a register and not to memory).
835
836 The REX.X bit supplies a high register bit for certain registers
837 in SIB address modes, and is generally rarely used.
838
839 For 8 bit register references, the presence of the REX byte itself
840 has significance. If there is no REX present, then the 3-bit
841 number extracted from the modrm e or g field is treated as an index
842 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
843 old x86 encoding scheme.
844
845 But if there is a REX present, the register reference is
846 interpreted in the same way as for 64/32/16-bit references: a high
847 bit is extracted from REX, giving a 4-bit number, and the denoted
848 register is the lowest 8 bits of the 16 integer registers denoted
849 by the number. In particular, values 3 through 7 of this sequence
850 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
851 %rsp %rbp %rsi %rdi.
852
853 The REX.W bit has no bearing at all on register numbers. Instead
854 its presence indicates that the operand size is to be overridden
855 from its default value (32 bits) to 64 bits instead. This is in
856 the same fashion that an 0x66 prefix indicates the operand size is
857 to be overridden from 32 bits down to 16 bits. When both REX.W and
858 0x66 are present there is a conflict, and REX.W takes precedence.
859
860 Rather than try to handle this complexity using a single huge
861 function, several smaller ones are provided. The aim is to make it
862 as difficult as possible to screw up register decoding in a subtle
863 and hard-to-track-down way.
864
865 Because these routines fish around in the host's memory (that is,
866 in the guest state area) for sub-parts of guest registers, their
867 correctness depends on the host's endianness. So far these
868 routines only work for little-endian hosts. Those for which
869 endianness is important have assertions to ensure sanity.
870*/
sewardjecb94892005-01-21 14:26:37 +0000871
872
sewardj5b470602005-02-27 13:10:48 +0000873/* About the simplest question you can ask: where do the 64-bit
874 integer registers live (in the guest state) ? */
sewardjecb94892005-01-21 14:26:37 +0000875
sewardj3ca55a12005-01-27 16:06:23 +0000876static Int integerGuestReg64Offset ( UInt reg )
sewardjb3a04292005-01-21 20:33:44 +0000877{
878 switch (reg) {
879 case R_RAX: return OFFB_RAX;
880 case R_RCX: return OFFB_RCX;
881 case R_RDX: return OFFB_RDX;
882 case R_RBX: return OFFB_RBX;
883 case R_RSP: return OFFB_RSP;
884 case R_RBP: return OFFB_RBP;
885 case R_RSI: return OFFB_RSI;
886 case R_RDI: return OFFB_RDI;
887 case R_R8: return OFFB_R8;
888 case R_R9: return OFFB_R9;
889 case R_R10: return OFFB_R10;
890 case R_R11: return OFFB_R11;
891 case R_R12: return OFFB_R12;
892 case R_R13: return OFFB_R13;
893 case R_R14: return OFFB_R14;
894 case R_R15: return OFFB_R15;
895 default: vpanic("integerGuestReg64Offset(amd64)");
896 }
897}
898
899
sewardj5b470602005-02-27 13:10:48 +0000900/* Produce the name of an integer register, for printing purposes.
901 reg is a number in the range 0 .. 15 that has been generated from a
902 3-bit reg-field number and a REX extension bit. irregular denotes
903 the case where sz==1 and no REX byte is present. */
sewardjecb94892005-01-21 14:26:37 +0000904
905static
florian55085f82012-11-21 00:36:55 +0000906const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000907{
florian55085f82012-11-21 00:36:55 +0000908 static const HChar* ireg64_names[16]
sewardjecb94892005-01-21 14:26:37 +0000909 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
910 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
florian55085f82012-11-21 00:36:55 +0000911 static const HChar* ireg32_names[16]
sewardjecb94892005-01-21 14:26:37 +0000912 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
913 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
florian55085f82012-11-21 00:36:55 +0000914 static const HChar* ireg16_names[16]
sewardjecb94892005-01-21 14:26:37 +0000915 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
916 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
florian55085f82012-11-21 00:36:55 +0000917 static const HChar* ireg8_names[16]
sewardjecb94892005-01-21 14:26:37 +0000918 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
919 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
florian55085f82012-11-21 00:36:55 +0000920 static const HChar* ireg8_irregular[8]
sewardjecb94892005-01-21 14:26:37 +0000921 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
922
sewardj5b470602005-02-27 13:10:48 +0000923 vassert(reg < 16);
924 if (sz == 1) {
925 if (irregular)
926 vassert(reg < 8);
927 } else {
928 vassert(irregular == False);
929 }
sewardjecb94892005-01-21 14:26:37 +0000930
931 switch (sz) {
sewardj5b470602005-02-27 13:10:48 +0000932 case 8: return ireg64_names[reg];
933 case 4: return ireg32_names[reg];
934 case 2: return ireg16_names[reg];
935 case 1: if (irregular) {
936 return ireg8_irregular[reg];
937 } else {
938 return ireg8_names[reg];
939 }
940 default: vpanic("nameIReg(amd64)");
sewardjecb94892005-01-21 14:26:37 +0000941 }
sewardjecb94892005-01-21 14:26:37 +0000942}
943
sewardj5b470602005-02-27 13:10:48 +0000944/* Using the same argument conventions as nameIReg, produce the
945 guest state offset of an integer register. */
sewardjb3a04292005-01-21 20:33:44 +0000946
sewardjecb94892005-01-21 14:26:37 +0000947static
sewardj5b470602005-02-27 13:10:48 +0000948Int offsetIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000949{
sewardj5b470602005-02-27 13:10:48 +0000950 vassert(reg < 16);
951 if (sz == 1) {
952 if (irregular)
953 vassert(reg < 8);
954 } else {
955 vassert(irregular == False);
sewardjecb94892005-01-21 14:26:37 +0000956 }
sewardj5b470602005-02-27 13:10:48 +0000957
958 /* Deal with irregular case -- sz==1 and no REX present */
959 if (sz == 1 && irregular) {
960 switch (reg) {
961 case R_RSP: return 1+ OFFB_RAX;
962 case R_RBP: return 1+ OFFB_RCX;
963 case R_RSI: return 1+ OFFB_RDX;
964 case R_RDI: return 1+ OFFB_RBX;
965 default: break; /* use the normal case */
966 }
sewardjecb94892005-01-21 14:26:37 +0000967 }
sewardj5b470602005-02-27 13:10:48 +0000968
969 /* Normal case */
970 return integerGuestReg64Offset(reg);
sewardjecb94892005-01-21 14:26:37 +0000971}
972
973
sewardj5b470602005-02-27 13:10:48 +0000974/* Read the %CL register :: Ity_I8, for shift/rotate operations. */
975
976static IRExpr* getIRegCL ( void )
977{
sewardj9b769162014-07-24 12:42:03 +0000978 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +0000979 return IRExpr_Get( OFFB_RCX, Ity_I8 );
980}
981
982
983/* Write to the %AH register. */
984
985static void putIRegAH ( IRExpr* e )
986{
sewardj9b769162014-07-24 12:42:03 +0000987 vassert(host_endness == VexEndnessLE);
sewardjdd40fdf2006-12-24 02:20:24 +0000988 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
sewardj5b470602005-02-27 13:10:48 +0000989 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
990}
991
992
993/* Read/write various widths of %RAX, as it has various
994 special-purpose uses. */
995
florian55085f82012-11-21 00:36:55 +0000996static const HChar* nameIRegRAX ( Int sz )
sewardj5b470602005-02-27 13:10:48 +0000997{
998 switch (sz) {
999 case 1: return "%al";
1000 case 2: return "%ax";
1001 case 4: return "%eax";
1002 case 8: return "%rax";
1003 default: vpanic("nameIRegRAX(amd64)");
1004 }
1005}
1006
1007static IRExpr* getIRegRAX ( Int sz )
1008{
sewardj9b769162014-07-24 12:42:03 +00001009 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001010 switch (sz) {
1011 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1012 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001013 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001014 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1015 default: vpanic("getIRegRAX(amd64)");
1016 }
1017}
1018
1019static void putIRegRAX ( Int sz, IRExpr* e )
1020{
sewardjdd40fdf2006-12-24 02:20:24 +00001021 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj9b769162014-07-24 12:42:03 +00001022 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001023 switch (sz) {
1024 case 8: vassert(ty == Ity_I64);
1025 stmt( IRStmt_Put( OFFB_RAX, e ));
1026 break;
1027 case 4: vassert(ty == Ity_I32);
1028 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1029 break;
1030 case 2: vassert(ty == Ity_I16);
1031 stmt( IRStmt_Put( OFFB_RAX, e ));
1032 break;
1033 case 1: vassert(ty == Ity_I8);
1034 stmt( IRStmt_Put( OFFB_RAX, e ));
1035 break;
1036 default: vpanic("putIRegRAX(amd64)");
1037 }
1038}
1039
1040
1041/* Read/write various widths of %RDX, as it has various
1042 special-purpose uses. */
1043
florian55085f82012-11-21 00:36:55 +00001044static const HChar* nameIRegRDX ( Int sz )
sewardjbb4396c2007-11-20 17:29:08 +00001045{
1046 switch (sz) {
1047 case 1: return "%dl";
1048 case 2: return "%dx";
1049 case 4: return "%edx";
1050 case 8: return "%rdx";
1051 default: vpanic("nameIRegRDX(amd64)");
1052 }
1053}
1054
sewardj5b470602005-02-27 13:10:48 +00001055static IRExpr* getIRegRDX ( Int sz )
1056{
sewardj9b769162014-07-24 12:42:03 +00001057 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001058 switch (sz) {
1059 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1060 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001061 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001062 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1063 default: vpanic("getIRegRDX(amd64)");
1064 }
1065}
1066
1067static void putIRegRDX ( Int sz, IRExpr* e )
1068{
sewardj9b769162014-07-24 12:42:03 +00001069 vassert(host_endness == VexEndnessLE);
sewardjdd40fdf2006-12-24 02:20:24 +00001070 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001071 switch (sz) {
1072 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1073 break;
1074 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1075 break;
1076 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1077 break;
1078 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1079 break;
1080 default: vpanic("putIRegRDX(amd64)");
1081 }
1082}
1083
1084
1085/* Simplistic functions to deal with the integer registers as a
1086 straightforward bank of 16 64-bit regs. */
sewardjb3a04292005-01-21 20:33:44 +00001087
1088static IRExpr* getIReg64 ( UInt regno )
1089{
1090 return IRExpr_Get( integerGuestReg64Offset(regno),
1091 Ity_I64 );
1092}
1093
sewardj2f959cc2005-01-26 01:19:35 +00001094static void putIReg64 ( UInt regno, IRExpr* e )
1095{
sewardjdd40fdf2006-12-24 02:20:24 +00001096 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00001097 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1098}
1099
florian55085f82012-11-21 00:36:55 +00001100static const HChar* nameIReg64 ( UInt regno )
sewardjb3a04292005-01-21 20:33:44 +00001101{
sewardj5b470602005-02-27 13:10:48 +00001102 return nameIReg( 8, regno, False );
sewardjb3a04292005-01-21 20:33:44 +00001103}
sewardj5b470602005-02-27 13:10:48 +00001104
1105
1106/* Simplistic functions to deal with the lower halves of integer
1107 registers as a straightforward bank of 16 32-bit regs. */
1108
1109static IRExpr* getIReg32 ( UInt regno )
1110{
sewardj9b769162014-07-24 12:42:03 +00001111 vassert(host_endness == VexEndnessLE);
sewardjef425db2010-01-11 10:46:18 +00001112 return unop(Iop_64to32,
1113 IRExpr_Get( integerGuestReg64Offset(regno),
1114 Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001115}
1116
1117static void putIReg32 ( UInt regno, IRExpr* e )
1118{
sewardjdd40fdf2006-12-24 02:20:24 +00001119 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj5b470602005-02-27 13:10:48 +00001120 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1121 unop(Iop_32Uto64,e) ) );
1122}
1123
florian55085f82012-11-21 00:36:55 +00001124static const HChar* nameIReg32 ( UInt regno )
sewardj5b470602005-02-27 13:10:48 +00001125{
1126 return nameIReg( 4, regno, False );
1127}
1128
1129
sewardja7ba8c42005-05-10 20:08:34 +00001130/* Simplistic functions to deal with the lower quarters of integer
1131 registers as a straightforward bank of 16 16-bit regs. */
1132
1133static IRExpr* getIReg16 ( UInt regno )
1134{
sewardj9b769162014-07-24 12:42:03 +00001135 vassert(host_endness == VexEndnessLE);
sewardja7ba8c42005-05-10 20:08:34 +00001136 return IRExpr_Get( integerGuestReg64Offset(regno),
1137 Ity_I16 );
1138}
1139
tom0fb4cbd2011-08-10 12:58:03 +00001140static void putIReg16 ( UInt regno, IRExpr* e )
1141{
1142 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1143 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1144 unop(Iop_16Uto64,e) ) );
1145}
1146
florian55085f82012-11-21 00:36:55 +00001147static const HChar* nameIReg16 ( UInt regno )
sewardja7ba8c42005-05-10 20:08:34 +00001148{
1149 return nameIReg( 2, regno, False );
1150}
1151
1152
sewardj5b470602005-02-27 13:10:48 +00001153/* Sometimes what we know is a 3-bit register number, a REX byte, and
1154 which field of the REX byte is to be used to extend to a 4-bit
1155 number. These functions cater for that situation.
1156*/
1157static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1158{
1159 vassert(lo3bits < 8);
1160 vassert(IS_VALID_PFX(pfx));
1161 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1162}
1163
florian55085f82012-11-21 00:36:55 +00001164static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
sewardj5b470602005-02-27 13:10:48 +00001165{
1166 vassert(lo3bits < 8);
1167 vassert(IS_VALID_PFX(pfx));
1168 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1169}
1170
florian55085f82012-11-21 00:36:55 +00001171static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
sewardj5b470602005-02-27 13:10:48 +00001172{
1173 vassert(lo3bits < 8);
1174 vassert(IS_VALID_PFX(pfx));
1175 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1176 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001177 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001178}
1179
1180static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1181{
1182 vassert(lo3bits < 8);
1183 vassert(IS_VALID_PFX(pfx));
1184 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjef425db2010-01-11 10:46:18 +00001185 if (sz == 4) {
1186 sz = 8;
1187 return unop(Iop_64to32,
1188 IRExpr_Get(
1189 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj8cef87d2013-01-20 11:39:52 +00001190 False/*!irregular*/ ),
sewardjef425db2010-01-11 10:46:18 +00001191 szToITy(sz)
1192 )
1193 );
1194 } else {
1195 return IRExpr_Get(
1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1197 toBool(sz==1 && !haveREX(pfx)) ),
1198 szToITy(sz)
1199 );
1200 }
sewardj5b470602005-02-27 13:10:48 +00001201}
1202
1203static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1204{
1205 vassert(lo3bits < 8);
1206 vassert(IS_VALID_PFX(pfx));
sewardj98e9f342005-07-23 12:07:37 +00001207 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjdd40fdf2006-12-24 02:20:24 +00001208 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001209 stmt( IRStmt_Put(
1210 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001211 toBool(sz==1 && !haveREX(pfx)) ),
sewardj5b470602005-02-27 13:10:48 +00001212 sz==4 ? unop(Iop_32Uto64,e) : e
1213 ));
1214}
1215
1216
1217/* Functions for getting register numbers from modrm bytes and REX
1218 when we don't have to consider the complexities of integer subreg
1219 accesses.
1220*/
1221/* Extract the g reg field from a modRM byte, and augment it using the
1222 REX.R bit from the supplied REX byte. The R bit usually is
1223 associated with the g register field.
1224*/
1225static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1226{
1227 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1228 reg += (pfx & PFX_REXR) ? 8 : 0;
1229 return reg;
1230}
1231
1232/* Extract the e reg field from a modRM byte, and augment it using the
1233 REX.B bit from the supplied REX byte. The B bit usually is
1234 associated with the e register field (when modrm indicates e is a
1235 register, that is).
1236*/
1237static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1238{
1239 Int rm;
1240 vassert(epartIsReg(mod_reg_rm));
1241 rm = (Int)(mod_reg_rm & 0x7);
1242 rm += (pfx & PFX_REXB) ? 8 : 0;
1243 return rm;
1244}
1245
1246
1247/* General functions for dealing with integer register access. */
1248
1249/* Produce the guest state offset for a reference to the 'g' register
1250 field in a modrm byte, taking into account REX (or its absence),
1251 and the size of the access.
1252*/
1253static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1254{
1255 UInt reg;
sewardj9b769162014-07-24 12:42:03 +00001256 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001257 vassert(IS_VALID_PFX(pfx));
1258 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1259 reg = gregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001260 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001261}
1262
1263static
1264IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1265{
sewardjef425db2010-01-11 10:46:18 +00001266 if (sz == 4) {
1267 sz = 8;
1268 return unop(Iop_64to32,
1269 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1270 szToITy(sz) ));
1271 } else {
1272 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1273 szToITy(sz) );
1274 }
sewardj5b470602005-02-27 13:10:48 +00001275}
1276
1277static
1278void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1279{
sewardjdd40fdf2006-12-24 02:20:24 +00001280 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001281 if (sz == 4) {
1282 e = unop(Iop_32Uto64,e);
1283 }
1284 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1285}
1286
1287static
florian55085f82012-11-21 00:36:55 +00001288const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
sewardj5b470602005-02-27 13:10:48 +00001289{
1290 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001291 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001292}
1293
1294
sewardjcc3d2192013-03-27 11:37:33 +00001295static
1296IRExpr* getIRegV ( Int sz, Prefix pfx )
1297{
1298 if (sz == 4) {
1299 sz = 8;
1300 return unop(Iop_64to32,
1301 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1302 szToITy(sz) ));
1303 } else {
1304 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1305 szToITy(sz) );
1306 }
1307}
1308
1309static
1310void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1311{
1312 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1313 if (sz == 4) {
1314 e = unop(Iop_32Uto64,e);
1315 }
1316 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1317}
1318
1319static
1320const HChar* nameIRegV ( Int sz, Prefix pfx )
1321{
1322 return nameIReg( sz, getVexNvvvv(pfx), False );
1323}
1324
1325
1326
sewardj5b470602005-02-27 13:10:48 +00001327/* Produce the guest state offset for a reference to the 'e' register
1328 field in a modrm byte, taking into account REX (or its absence),
1329 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1330 denotes a memory access rather than a register access.
1331*/
1332static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1333{
1334 UInt reg;
sewardj9b769162014-07-24 12:42:03 +00001335 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001336 vassert(IS_VALID_PFX(pfx));
1337 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1338 reg = eregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001339 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001340}
1341
1342static
1343IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1344{
sewardjef425db2010-01-11 10:46:18 +00001345 if (sz == 4) {
1346 sz = 8;
1347 return unop(Iop_64to32,
1348 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1349 szToITy(sz) ));
1350 } else {
1351 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1352 szToITy(sz) );
1353 }
sewardj5b470602005-02-27 13:10:48 +00001354}
1355
1356static
1357void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1358{
sewardjdd40fdf2006-12-24 02:20:24 +00001359 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001360 if (sz == 4) {
1361 e = unop(Iop_32Uto64,e);
1362 }
1363 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1364}
1365
1366static
florian55085f82012-11-21 00:36:55 +00001367const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
sewardj5b470602005-02-27 13:10:48 +00001368{
1369 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001370 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001371}
1372
1373
1374/*------------------------------------------------------------*/
1375/*--- For dealing with XMM registers ---*/
1376/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +00001377
sewardjc4530ae2012-05-21 10:18:49 +00001378static Int ymmGuestRegOffset ( UInt ymmreg )
1379{
1380 switch (ymmreg) {
1381 case 0: return OFFB_YMM0;
1382 case 1: return OFFB_YMM1;
1383 case 2: return OFFB_YMM2;
1384 case 3: return OFFB_YMM3;
1385 case 4: return OFFB_YMM4;
1386 case 5: return OFFB_YMM5;
1387 case 6: return OFFB_YMM6;
1388 case 7: return OFFB_YMM7;
1389 case 8: return OFFB_YMM8;
1390 case 9: return OFFB_YMM9;
1391 case 10: return OFFB_YMM10;
1392 case 11: return OFFB_YMM11;
1393 case 12: return OFFB_YMM12;
1394 case 13: return OFFB_YMM13;
1395 case 14: return OFFB_YMM14;
1396 case 15: return OFFB_YMM15;
1397 default: vpanic("ymmGuestRegOffset(amd64)");
1398 }
1399}
sewardj1001dc42005-02-21 08:25:55 +00001400
1401static Int xmmGuestRegOffset ( UInt xmmreg )
1402{
sewardjc4530ae2012-05-21 10:18:49 +00001403 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001404 vassert(host_endness == VexEndnessLE);
sewardjc4530ae2012-05-21 10:18:49 +00001405 return ymmGuestRegOffset( xmmreg );
sewardj1001dc42005-02-21 08:25:55 +00001406}
1407
sewardj97628592005-05-10 22:42:54 +00001408/* Lanes of vector registers are always numbered from zero being the
1409 least significant lane (rightmost in the register). */
1410
1411static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1412{
1413 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001414 vassert(host_endness == VexEndnessLE);
sewardj97628592005-05-10 22:42:54 +00001415 vassert(laneno >= 0 && laneno < 8);
1416 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1417}
sewardj8d965312005-02-25 02:48:47 +00001418
1419static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1420{
1421 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001422 vassert(host_endness == VexEndnessLE);
sewardj8d965312005-02-25 02:48:47 +00001423 vassert(laneno >= 0 && laneno < 4);
1424 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1425}
sewardj1001dc42005-02-21 08:25:55 +00001426
1427static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1428{
1429 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001430 vassert(host_endness == VexEndnessLE);
sewardj1001dc42005-02-21 08:25:55 +00001431 vassert(laneno >= 0 && laneno < 2);
1432 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1433}
1434
sewardjc4530ae2012-05-21 10:18:49 +00001435static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1436{
1437 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001438 vassert(host_endness == VexEndnessLE);
sewardjc4530ae2012-05-21 10:18:49 +00001439 vassert(laneno >= 0 && laneno < 2);
1440 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1441}
sewardj1001dc42005-02-21 08:25:55 +00001442
sewardj66becf32012-06-18 23:15:16 +00001443static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1444{
1445 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001446 vassert(host_endness == VexEndnessLE);
sewardj66becf32012-06-18 23:15:16 +00001447 vassert(laneno >= 0 && laneno < 4);
1448 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1449}
1450
1451static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1452{
1453 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001454 vassert(host_endness == VexEndnessLE);
sewardj66becf32012-06-18 23:15:16 +00001455 vassert(laneno >= 0 && laneno < 8);
1456 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1457}
1458
sewardj1001dc42005-02-21 08:25:55 +00001459static IRExpr* getXMMReg ( UInt xmmreg )
1460{
1461 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1462}
1463
1464static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1465{
1466 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1467}
1468
sewardj18303862005-02-21 12:36:54 +00001469static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1470{
1471 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1472}
1473
sewardj8d965312005-02-25 02:48:47 +00001474static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1475{
1476 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1477}
1478
sewardjc49ce232005-02-25 13:03:03 +00001479static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1480{
1481 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1482}
sewardj1001dc42005-02-21 08:25:55 +00001483
de5a70f5c2010-04-01 23:08:59 +00001484static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1485{
1486 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1487}
1488
sewardj1001dc42005-02-21 08:25:55 +00001489static void putXMMReg ( UInt xmmreg, IRExpr* e )
1490{
sewardjdd40fdf2006-12-24 02:20:24 +00001491 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
sewardj1001dc42005-02-21 08:25:55 +00001492 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1493}
1494
1495static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1496{
sewardjdd40fdf2006-12-24 02:20:24 +00001497 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj1001dc42005-02-21 08:25:55 +00001498 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1499}
1500
sewardj1a01e652005-02-23 11:39:21 +00001501static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1502{
sewardjdd40fdf2006-12-24 02:20:24 +00001503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
sewardj1a01e652005-02-23 11:39:21 +00001504 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1505}
1506
sewardj8d965312005-02-25 02:48:47 +00001507static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1508{
sewardjdd40fdf2006-12-24 02:20:24 +00001509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
sewardj8d965312005-02-25 02:48:47 +00001510 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1511}
1512
1513static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1514{
sewardjdd40fdf2006-12-24 02:20:24 +00001515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00001516 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1517}
1518
sewardjc4530ae2012-05-21 10:18:49 +00001519static IRExpr* getYMMReg ( UInt xmmreg )
1520{
1521 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1522}
1523
1524static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1525{
1526 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1527}
1528
sewardj82096922012-06-24 14:57:59 +00001529static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1530{
1531 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1532}
1533
sewardj8eb7ae82012-06-24 14:00:27 +00001534static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1535{
1536 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1537}
1538
sewardjc4530ae2012-05-21 10:18:49 +00001539static void putYMMReg ( UInt ymmreg, IRExpr* e )
1540{
1541 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1542 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1543}
1544
1545static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1546{
1547 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1548 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1549}
1550
sewardj66becf32012-06-18 23:15:16 +00001551static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1552{
1553 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1554 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1555}
1556
sewardj82096922012-06-24 14:57:59 +00001557static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1558{
1559 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1560 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1561}
1562
sewardj66becf32012-06-18 23:15:16 +00001563static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1564{
1565 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1566 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1567}
1568
1569static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1570{
1571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1572 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1573}
1574
sewardj1001dc42005-02-21 08:25:55 +00001575static IRExpr* mkV128 ( UShort mask )
1576{
1577 return IRExpr_Const(IRConst_V128(mask));
1578}
sewardjdf0e0022005-01-25 15:48:43 +00001579
sewardjc4530ae2012-05-21 10:18:49 +00001580/* Write the low half of a YMM reg and zero out the upper half. */
1581static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1582{
1583 putYMMRegLane128( ymmreg, 0, e );
1584 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1585}
1586
sewardje8f65252005-08-23 23:44:35 +00001587static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1588{
sewardjdd40fdf2006-12-24 02:20:24 +00001589 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1590 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
sewardje8f65252005-08-23 23:44:35 +00001591 return unop(Iop_64to1,
1592 binop(Iop_And64,
1593 unop(Iop_1Uto64,x),
1594 unop(Iop_1Uto64,y)));
1595}
1596
sewardje9d8a262009-07-01 08:06:34 +00001597/* Generate a compare-and-swap operation, operating on memory at
1598 'addr'. The expected value is 'expVal' and the new value is
1599 'newVal'. If the operation fails, then transfer control (with a
1600 no-redir jump (XXX no -- see comment at top of this file)) to
1601 'restart_point', which is presumably the address of the guest
1602 instruction again -- retrying, essentially. */
1603static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1604 Addr64 restart_point )
1605{
1606 IRCAS* cas;
1607 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1608 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1609 IRTemp oldTmp = newTemp(tyE);
1610 IRTemp expTmp = newTemp(tyE);
1611 vassert(tyE == tyN);
1612 vassert(tyE == Ity_I64 || tyE == Ity_I32
1613 || tyE == Ity_I16 || tyE == Ity_I8);
1614 assign(expTmp, expVal);
1615 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1616 NULL, mkexpr(expTmp), NULL, newVal );
1617 stmt( IRStmt_CAS(cas) );
1618 stmt( IRStmt_Exit(
sewardj1fb8c922009-07-12 12:56:53 +00001619 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1620 mkexpr(oldTmp), mkexpr(expTmp) ),
sewardje9d8a262009-07-01 08:06:34 +00001621 Ijk_Boring, /*Ijk_NoRedir*/
sewardjc6f970f2012-04-02 21:54:49 +00001622 IRConst_U64( restart_point ),
1623 OFFB_RIP
sewardje9d8a262009-07-01 08:06:34 +00001624 ));
1625}
1626
sewardj5b470602005-02-27 13:10:48 +00001627
sewardj118b23e2005-01-29 02:14:44 +00001628/*------------------------------------------------------------*/
sewardje8f65252005-08-23 23:44:35 +00001629/*--- Helpers for %rflags. ---*/
sewardj118b23e2005-01-29 02:14:44 +00001630/*------------------------------------------------------------*/
1631
1632/* -------------- Evaluating the flags-thunk. -------------- */
1633
1634/* Build IR to calculate all the eflags from stored
1635 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1636 Ity_I64. */
1637static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1638{
1639 IRExpr** args
1640 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1641 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1642 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1643 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1644 IRExpr* call
1645 = mkIRExprCCall(
1646 Ity_I64,
1647 0/*regparm*/,
1648 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1649 args
1650 );
1651 /* Exclude OP and NDEP from definedness checking. We're only
1652 interested in DEP1 and DEP2. */
1653 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1654 return call;
1655}
sewardj3ca55a12005-01-27 16:06:23 +00001656
1657/* Build IR to calculate some particular condition from stored
1658 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1659 Ity_Bit. */
1660static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1661{
1662 IRExpr** args
1663 = mkIRExprVec_5( mkU64(cond),
1664 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1665 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1666 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1667 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1668 IRExpr* call
1669 = mkIRExprCCall(
1670 Ity_I64,
1671 0/*regparm*/,
1672 "amd64g_calculate_condition", &amd64g_calculate_condition,
1673 args
1674 );
1675 /* Exclude the requested condition, OP and NDEP from definedness
1676 checking. We're only interested in DEP1 and DEP2. */
1677 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
sewardje58967e2005-04-27 11:50:56 +00001678 return unop(Iop_64to1, call);
sewardj3ca55a12005-01-27 16:06:23 +00001679}
sewardjdf0e0022005-01-25 15:48:43 +00001680
1681/* Build IR to calculate just the carry flag from stored
1682 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1683static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1684{
1685 IRExpr** args
1686 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1690 IRExpr* call
1691 = mkIRExprCCall(
1692 Ity_I64,
1693 0/*regparm*/,
1694 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1695 args
1696 );
1697 /* Exclude OP and NDEP from definedness checking. We're only
1698 interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1700 return call;
1701}
1702
1703
1704/* -------------- Building the flags-thunk. -------------- */
1705
1706/* The machinery in this section builds the flag-thunk following a
1707 flag-setting operation. Hence the various setFlags_* functions.
1708*/
1709
1710static Bool isAddSub ( IROp op8 )
1711{
sewardj7a240552005-01-28 21:37:12 +00001712 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
sewardjdf0e0022005-01-25 15:48:43 +00001713}
1714
sewardj3ca55a12005-01-27 16:06:23 +00001715static Bool isLogic ( IROp op8 )
1716{
sewardj7a240552005-01-28 21:37:12 +00001717 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
sewardj3ca55a12005-01-27 16:06:23 +00001718}
sewardjdf0e0022005-01-25 15:48:43 +00001719
sewardj656b8f42013-03-27 22:15:36 +00001720/* U-widen 1/8/16/32/64 bit int expr to 64. */
sewardjdf0e0022005-01-25 15:48:43 +00001721static IRExpr* widenUto64 ( IRExpr* e )
1722{
sewardjdd40fdf2006-12-24 02:20:24 +00001723 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardjdf0e0022005-01-25 15:48:43 +00001724 case Ity_I64: return e;
1725 case Ity_I32: return unop(Iop_32Uto64, e);
sewardje58967e2005-04-27 11:50:56 +00001726 case Ity_I16: return unop(Iop_16Uto64, e);
1727 case Ity_I8: return unop(Iop_8Uto64, e);
sewardj656b8f42013-03-27 22:15:36 +00001728 case Ity_I1: return unop(Iop_1Uto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001729 default: vpanic("widenUto64");
sewardjdf0e0022005-01-25 15:48:43 +00001730 }
1731}
1732
sewardj118b23e2005-01-29 02:14:44 +00001733/* S-widen 8/16/32/64 bit int expr to 32. */
1734static IRExpr* widenSto64 ( IRExpr* e )
1735{
sewardjdd40fdf2006-12-24 02:20:24 +00001736 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardj118b23e2005-01-29 02:14:44 +00001737 case Ity_I64: return e;
1738 case Ity_I32: return unop(Iop_32Sto64, e);
sewardje58967e2005-04-27 11:50:56 +00001739 case Ity_I16: return unop(Iop_16Sto64, e);
1740 case Ity_I8: return unop(Iop_8Sto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001741 default: vpanic("widenSto64");
1742 }
1743}
sewardjdf0e0022005-01-25 15:48:43 +00001744
1745/* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1746 of these combinations make sense. */
1747static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1748{
sewardjdd40fdf2006-12-24 02:20:24 +00001749 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
sewardjdf0e0022005-01-25 15:48:43 +00001750 if (src_ty == dst_ty)
1751 return e;
1752 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1753 return unop(Iop_32to16, e);
1754 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1755 return unop(Iop_32to8, e);
sewardj118b23e2005-01-29 02:14:44 +00001756 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1757 return unop(Iop_64to32, e);
1758 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
sewardje58967e2005-04-27 11:50:56 +00001759 return unop(Iop_64to16, e);
sewardj03b07cc2005-01-31 18:09:43 +00001760 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
sewardje58967e2005-04-27 11:50:56 +00001761 return unop(Iop_64to8, e);
sewardjdf0e0022005-01-25 15:48:43 +00001762
1763 vex_printf("\nsrc, dst tys are: ");
1764 ppIRType(src_ty);
1765 vex_printf(", ");
1766 ppIRType(dst_ty);
1767 vex_printf("\n");
1768 vpanic("narrowTo(amd64)");
1769}
1770
1771
1772/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1773 auto-sized up to the real op. */
1774
1775static
1776void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1777{
1778 Int ccOp = 0;
1779 switch (ty) {
1780 case Ity_I8: ccOp = 0; break;
1781 case Ity_I16: ccOp = 1; break;
1782 case Ity_I32: ccOp = 2; break;
1783 case Ity_I64: ccOp = 3; break;
1784 default: vassert(0);
1785 }
1786 switch (op8) {
1787 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1788 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1789 default: ppIROp(op8);
1790 vpanic("setFlags_DEP1_DEP2(amd64)");
1791 }
1792 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1793 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1794 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1795}
1796
1797
1798/* Set the OP and DEP1 fields only, and write zero to DEP2. */
1799
1800static
1801void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1802{
1803 Int ccOp = 0;
1804 switch (ty) {
1805 case Ity_I8: ccOp = 0; break;
1806 case Ity_I16: ccOp = 1; break;
1807 case Ity_I32: ccOp = 2; break;
1808 case Ity_I64: ccOp = 3; break;
1809 default: vassert(0);
1810 }
1811 switch (op8) {
1812 case Iop_Or8:
1813 case Iop_And8:
1814 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1815 default: ppIROp(op8);
1816 vpanic("setFlags_DEP1(amd64)");
1817 }
1818 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1819 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1820 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1821}
1822
1823
sewardj118b23e2005-01-29 02:14:44 +00001824/* For shift operations, we put in the result and the undershifted
1825 result. Except if the shift amount is zero, the thunk is left
1826 unchanged. */
1827
1828static void setFlags_DEP1_DEP2_shift ( IROp op64,
1829 IRTemp res,
1830 IRTemp resUS,
1831 IRType ty,
1832 IRTemp guard )
1833{
1834 Int ccOp = 0;
1835 switch (ty) {
1836 case Ity_I8: ccOp = 0; break;
1837 case Ity_I16: ccOp = 1; break;
1838 case Ity_I32: ccOp = 2; break;
1839 case Ity_I64: ccOp = 3; break;
1840 default: vassert(0);
1841 }
1842
1843 vassert(guard);
1844
1845 /* Both kinds of right shifts are handled by the same thunk
1846 operation. */
1847 switch (op64) {
1848 case Iop_Shr64:
1849 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1850 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1851 default: ppIROp(op64);
1852 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1853 }
1854
sewardj009230b2013-01-26 11:47:55 +00001855 /* guard :: Ity_I8. We need to convert it to I1. */
1856 IRTemp guardB = newTemp(Ity_I1);
1857 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1858
sewardj118b23e2005-01-29 02:14:44 +00001859 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1860 stmt( IRStmt_Put( OFFB_CC_OP,
florian99dd03e2013-01-29 03:56:06 +00001861 IRExpr_ITE( mkexpr(guardB),
1862 mkU64(ccOp),
1863 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001864 stmt( IRStmt_Put( OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00001865 IRExpr_ITE( mkexpr(guardB),
1866 widenUto64(mkexpr(res)),
1867 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001868 stmt( IRStmt_Put( OFFB_CC_DEP2,
florian99dd03e2013-01-29 03:56:06 +00001869 IRExpr_ITE( mkexpr(guardB),
1870 widenUto64(mkexpr(resUS)),
1871 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001872}
sewardj354e5c62005-01-27 20:12:52 +00001873
1874
1875/* For the inc/dec case, we store in DEP1 the result value and in NDEP
1876 the former value of the carry flag, which unfortunately we have to
1877 compute. */
1878
1879static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1880{
1881 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1882
1883 switch (ty) {
1884 case Ity_I8: ccOp += 0; break;
1885 case Ity_I16: ccOp += 1; break;
1886 case Ity_I32: ccOp += 2; break;
1887 case Ity_I64: ccOp += 3; break;
1888 default: vassert(0);
1889 }
1890
1891 /* This has to come first, because calculating the C flag
1892 may require reading all four thunk fields. */
1893 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
sewardj478646f2008-05-01 20:13:04 +00001895 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
sewardj354e5c62005-01-27 20:12:52 +00001896 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1897}
1898
1899
sewardj32b2bbe2005-01-28 00:50:10 +00001900/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1901 two arguments. */
1902
1903static
1904void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1905{
1906 switch (ty) {
1907 case Ity_I8:
1908 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1909 break;
1910 case Ity_I16:
1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1912 break;
1913 case Ity_I32:
1914 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1915 break;
1916 case Ity_I64:
1917 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1918 break;
1919 default:
1920 vpanic("setFlags_MUL(amd64)");
1921 }
1922 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1923 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1924}
sewardj3ca55a12005-01-27 16:06:23 +00001925
1926
1927/* -------------- Condition codes. -------------- */
1928
1929/* Condition codes, using the AMD encoding. */
1930
florian55085f82012-11-21 00:36:55 +00001931static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
sewardj3ca55a12005-01-27 16:06:23 +00001932{
1933 switch (cond) {
1934 case AMD64CondO: return "o";
1935 case AMD64CondNO: return "no";
1936 case AMD64CondB: return "b";
sewardje941eea2005-01-30 19:52:28 +00001937 case AMD64CondNB: return "ae"; /*"nb";*/
1938 case AMD64CondZ: return "e"; /*"z";*/
1939 case AMD64CondNZ: return "ne"; /*"nz";*/
sewardj3ca55a12005-01-27 16:06:23 +00001940 case AMD64CondBE: return "be";
sewardje941eea2005-01-30 19:52:28 +00001941 case AMD64CondNBE: return "a"; /*"nbe";*/
sewardj3ca55a12005-01-27 16:06:23 +00001942 case AMD64CondS: return "s";
1943 case AMD64CondNS: return "ns";
1944 case AMD64CondP: return "p";
1945 case AMD64CondNP: return "np";
1946 case AMD64CondL: return "l";
sewardje941eea2005-01-30 19:52:28 +00001947 case AMD64CondNL: return "ge"; /*"nl";*/
sewardj3ca55a12005-01-27 16:06:23 +00001948 case AMD64CondLE: return "le";
sewardje941eea2005-01-30 19:52:28 +00001949 case AMD64CondNLE: return "g"; /*"nle";*/
sewardj3ca55a12005-01-27 16:06:23 +00001950 case AMD64CondAlways: return "ALWAYS";
1951 default: vpanic("name_AMD64Condcode");
1952 }
1953}
1954
sewardj1389d4d2005-01-28 13:46:29 +00001955static
1956AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1957 /*OUT*/Bool* needInvert )
1958{
1959 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1960 if (cond & 1) {
1961 *needInvert = True;
1962 return cond-1;
1963 } else {
1964 *needInvert = False;
1965 return cond;
1966 }
1967}
sewardjdf0e0022005-01-25 15:48:43 +00001968
1969
1970/* -------------- Helpers for ADD/SUB with carry. -------------- */
1971
1972/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1973 appropriately.
sewardje9d8a262009-07-01 08:06:34 +00001974
1975 Optionally, generate a store for the 'tres' value. This can either
1976 be a normal store, or it can be a cas-with-possible-failure style
1977 store:
1978
1979 if taddr is IRTemp_INVALID, then no store is generated.
1980
1981 if taddr is not IRTemp_INVALID, then a store (using taddr as
1982 the address) is generated:
1983
1984 if texpVal is IRTemp_INVALID then a normal store is
1985 generated, and restart_point must be zero (it is irrelevant).
1986
1987 if texpVal is not IRTemp_INVALID then a cas-style store is
1988 generated. texpVal is the expected value, restart_point
1989 is the restart point if the store fails, and texpVal must
1990 have the same type as tres.
1991
sewardjdf0e0022005-01-25 15:48:43 +00001992*/
1993static void helper_ADC ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00001994 IRTemp tres, IRTemp ta1, IRTemp ta2,
1995 /* info about optional store: */
1996 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00001997{
1998 UInt thunkOp;
1999 IRType ty = szToITy(sz);
2000 IRTemp oldc = newTemp(Ity_I64);
2001 IRTemp oldcn = newTemp(ty);
2002 IROp plus = mkSizedOp(ty, Iop_Add8);
2003 IROp xor = mkSizedOp(ty, Iop_Xor8);
2004
sewardje9d8a262009-07-01 08:06:34 +00002005 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2006
sewardjdf0e0022005-01-25 15:48:43 +00002007 switch (sz) {
2008 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2009 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2010 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2011 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2012 default: vassert(0);
2013 }
2014
2015 /* oldc = old carry flag, 0 or 1 */
2016 assign( oldc, binop(Iop_And64,
2017 mk_amd64g_calculate_rflags_c(),
2018 mkU64(1)) );
2019
2020 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2021
2022 assign( tres, binop(plus,
2023 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2024 mkexpr(oldcn)) );
2025
sewardje9d8a262009-07-01 08:06:34 +00002026 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2027 start of this function. */
2028 if (taddr != IRTemp_INVALID) {
2029 if (texpVal == IRTemp_INVALID) {
2030 vassert(restart_point == 0);
2031 storeLE( mkexpr(taddr), mkexpr(tres) );
2032 } else {
2033 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2034 /* .. and hence 'texpVal' has the same type as 'tres'. */
2035 casLE( mkexpr(taddr),
2036 mkexpr(texpVal), mkexpr(tres), restart_point );
2037 }
2038 }
2039
sewardjdf0e0022005-01-25 15:48:43 +00002040 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002041 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2042 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2043 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002044 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2045}
2046
2047
2048/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
sewardje9d8a262009-07-01 08:06:34 +00002049 appropriately. As with helper_ADC, possibly generate a store of
2050 the result -- see comments on helper_ADC for details.
sewardjdf0e0022005-01-25 15:48:43 +00002051*/
2052static void helper_SBB ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00002053 IRTemp tres, IRTemp ta1, IRTemp ta2,
2054 /* info about optional store: */
2055 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00002056{
2057 UInt thunkOp;
2058 IRType ty = szToITy(sz);
2059 IRTemp oldc = newTemp(Ity_I64);
2060 IRTemp oldcn = newTemp(ty);
2061 IROp minus = mkSizedOp(ty, Iop_Sub8);
2062 IROp xor = mkSizedOp(ty, Iop_Xor8);
2063
sewardje9d8a262009-07-01 08:06:34 +00002064 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2065
sewardjdf0e0022005-01-25 15:48:43 +00002066 switch (sz) {
2067 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2068 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2069 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2070 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2071 default: vassert(0);
2072 }
2073
2074 /* oldc = old carry flag, 0 or 1 */
2075 assign( oldc, binop(Iop_And64,
2076 mk_amd64g_calculate_rflags_c(),
2077 mkU64(1)) );
2078
2079 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2080
2081 assign( tres, binop(minus,
2082 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2083 mkexpr(oldcn)) );
2084
sewardje9d8a262009-07-01 08:06:34 +00002085 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2086 start of this function. */
2087 if (taddr != IRTemp_INVALID) {
2088 if (texpVal == IRTemp_INVALID) {
2089 vassert(restart_point == 0);
2090 storeLE( mkexpr(taddr), mkexpr(tres) );
2091 } else {
2092 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2093 /* .. and hence 'texpVal' has the same type as 'tres'. */
2094 casLE( mkexpr(taddr),
2095 mkexpr(texpVal), mkexpr(tres), restart_point );
2096 }
2097 }
2098
sewardjdf0e0022005-01-25 15:48:43 +00002099 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002100 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2101 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2102 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002103 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2104}
2105
2106
sewardj3ca55a12005-01-27 16:06:23 +00002107/* -------------- Helpers for disassembly printing. -------------- */
2108
florian55085f82012-11-21 00:36:55 +00002109static const HChar* nameGrp1 ( Int opc_aux )
sewardj3ca55a12005-01-27 16:06:23 +00002110{
florian55085f82012-11-21 00:36:55 +00002111 static const HChar* grp1_names[8]
sewardj3ca55a12005-01-27 16:06:23 +00002112 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2113 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2114 return grp1_names[opc_aux];
2115}
2116
florian55085f82012-11-21 00:36:55 +00002117static const HChar* nameGrp2 ( Int opc_aux )
sewardj118b23e2005-01-29 02:14:44 +00002118{
florian55085f82012-11-21 00:36:55 +00002119 static const HChar* grp2_names[8]
sewardj118b23e2005-01-29 02:14:44 +00002120 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
sewardje941eea2005-01-30 19:52:28 +00002121 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
sewardj118b23e2005-01-29 02:14:44 +00002122 return grp2_names[opc_aux];
2123}
2124
florian55085f82012-11-21 00:36:55 +00002125static const HChar* nameGrp4 ( Int opc_aux )
sewardj03b07cc2005-01-31 18:09:43 +00002126{
florian55085f82012-11-21 00:36:55 +00002127 static const HChar* grp4_names[8]
sewardj03b07cc2005-01-31 18:09:43 +00002128 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2129 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2130 return grp4_names[opc_aux];
2131}
sewardj354e5c62005-01-27 20:12:52 +00002132
florian55085f82012-11-21 00:36:55 +00002133static const HChar* nameGrp5 ( Int opc_aux )
sewardj354e5c62005-01-27 20:12:52 +00002134{
florian55085f82012-11-21 00:36:55 +00002135 static const HChar* grp5_names[8]
sewardj354e5c62005-01-27 20:12:52 +00002136 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2137 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2138 return grp5_names[opc_aux];
2139}
2140
florian55085f82012-11-21 00:36:55 +00002141static const HChar* nameGrp8 ( Int opc_aux )
sewardj1d511802005-03-27 17:59:45 +00002142{
florian55085f82012-11-21 00:36:55 +00002143 static const HChar* grp8_names[8]
sewardj1d511802005-03-27 17:59:45 +00002144 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2145 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2146 return grp8_names[opc_aux];
2147}
2148
florian55085f82012-11-21 00:36:55 +00002149//.. static const HChar* nameSReg ( UInt sreg )
sewardjd20c8852005-01-20 20:04:07 +00002150//.. {
2151//.. switch (sreg) {
2152//.. case R_ES: return "%es";
2153//.. case R_CS: return "%cs";
2154//.. case R_SS: return "%ss";
2155//.. case R_DS: return "%ds";
2156//.. case R_FS: return "%fs";
2157//.. case R_GS: return "%gs";
2158//.. default: vpanic("nameSReg(x86)");
2159//.. }
2160//.. }
sewardj8711f662005-05-09 17:52:56 +00002161
florian55085f82012-11-21 00:36:55 +00002162static const HChar* nameMMXReg ( Int mmxreg )
sewardj8711f662005-05-09 17:52:56 +00002163{
florian55085f82012-11-21 00:36:55 +00002164 static const HChar* mmx_names[8]
sewardj8711f662005-05-09 17:52:56 +00002165 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2166 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2167 return mmx_names[mmxreg];
2168}
sewardj1001dc42005-02-21 08:25:55 +00002169
florian55085f82012-11-21 00:36:55 +00002170static const HChar* nameXMMReg ( Int xmmreg )
sewardj1001dc42005-02-21 08:25:55 +00002171{
florian55085f82012-11-21 00:36:55 +00002172 static const HChar* xmm_names[16]
sewardj1001dc42005-02-21 08:25:55 +00002173 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2174 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2175 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2176 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2177 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2178 return xmm_names[xmmreg];
2179}
2180
florian55085f82012-11-21 00:36:55 +00002181static const HChar* nameMMXGran ( Int gran )
sewardj8711f662005-05-09 17:52:56 +00002182{
2183 switch (gran) {
2184 case 0: return "b";
2185 case 1: return "w";
2186 case 2: return "d";
2187 case 3: return "q";
2188 default: vpanic("nameMMXGran(amd64,guest)");
2189 }
2190}
sewardjdf0e0022005-01-25 15:48:43 +00002191
florianf8883992013-01-15 03:25:17 +00002192static HChar nameISize ( Int size )
sewardjdf0e0022005-01-25 15:48:43 +00002193{
2194 switch (size) {
2195 case 8: return 'q';
2196 case 4: return 'l';
2197 case 2: return 'w';
2198 case 1: return 'b';
2199 default: vpanic("nameISize(amd64)");
2200 }
2201}
2202
florian55085f82012-11-21 00:36:55 +00002203static const HChar* nameYMMReg ( Int ymmreg )
sewardjc4530ae2012-05-21 10:18:49 +00002204{
florian55085f82012-11-21 00:36:55 +00002205 static const HChar* ymm_names[16]
sewardjc4530ae2012-05-21 10:18:49 +00002206 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2207 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2208 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2209 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2210 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2211 return ymm_names[ymmreg];
2212}
2213
sewardjdf0e0022005-01-25 15:48:43 +00002214
2215/*------------------------------------------------------------*/
2216/*--- JMP helpers ---*/
2217/*------------------------------------------------------------*/
2218
sewardjc6f970f2012-04-02 21:54:49 +00002219static void jmp_lit( /*MOD*/DisResult* dres,
2220 IRJumpKind kind, Addr64 d64 )
sewardjdf0e0022005-01-25 15:48:43 +00002221{
sewardjc6f970f2012-04-02 21:54:49 +00002222 vassert(dres->whatNext == Dis_Continue);
2223 vassert(dres->len == 0);
2224 vassert(dres->continueAt == 0);
2225 vassert(dres->jk_StopHere == Ijk_INVALID);
2226 dres->whatNext = Dis_StopHere;
2227 dres->jk_StopHere = kind;
2228 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
sewardjdf0e0022005-01-25 15:48:43 +00002229}
2230
sewardjc6f970f2012-04-02 21:54:49 +00002231static void jmp_treg( /*MOD*/DisResult* dres,
2232 IRJumpKind kind, IRTemp t )
sewardj2f959cc2005-01-26 01:19:35 +00002233{
sewardjc6f970f2012-04-02 21:54:49 +00002234 vassert(dres->whatNext == Dis_Continue);
2235 vassert(dres->len == 0);
2236 vassert(dres->continueAt == 0);
2237 vassert(dres->jk_StopHere == Ijk_INVALID);
2238 dres->whatNext = Dis_StopHere;
2239 dres->jk_StopHere = kind;
2240 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
sewardj2f959cc2005-01-26 01:19:35 +00002241}
2242
sewardj1389d4d2005-01-28 13:46:29 +00002243static
sewardjc6f970f2012-04-02 21:54:49 +00002244void jcc_01 ( /*MOD*/DisResult* dres,
2245 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
sewardj1389d4d2005-01-28 13:46:29 +00002246{
2247 Bool invert;
2248 AMD64Condcode condPos;
sewardjc6f970f2012-04-02 21:54:49 +00002249 vassert(dres->whatNext == Dis_Continue);
2250 vassert(dres->len == 0);
2251 vassert(dres->continueAt == 0);
2252 vassert(dres->jk_StopHere == Ijk_INVALID);
2253 dres->whatNext = Dis_StopHere;
2254 dres->jk_StopHere = Ijk_Boring;
sewardj1389d4d2005-01-28 13:46:29 +00002255 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2256 if (invert) {
2257 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2258 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002259 IRConst_U64(d64_false),
2260 OFFB_RIP ) );
2261 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002262 } else {
2263 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2264 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002265 IRConst_U64(d64_true),
2266 OFFB_RIP ) );
2267 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002268 }
2269}
sewardjb3a04292005-01-21 20:33:44 +00002270
sewardj478646f2008-05-01 20:13:04 +00002271/* Let new_rsp be the %rsp value after a call/return. Let nia be the
2272 guest address of the next instruction to be executed.
2273
2274 This function generates an AbiHint to say that -128(%rsp)
2275 .. -1(%rsp) should now be regarded as uninitialised.
sewardj5a9ffab2005-05-12 17:55:01 +00002276*/
sewardjaca070a2006-10-17 00:28:22 +00002277static
floriancacba8e2014-12-15 18:58:07 +00002278void make_redzone_AbiHint ( const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +00002279 IRTemp new_rsp, IRTemp nia, const HChar* who )
sewardj5a9ffab2005-05-12 17:55:01 +00002280{
sewardjdd40fdf2006-12-24 02:20:24 +00002281 Int szB = vbi->guest_stack_redzone_size;
sewardjaca070a2006-10-17 00:28:22 +00002282 vassert(szB >= 0);
2283
2284 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2285 for is ELF. So just check it's the expected 128 value
2286 (paranoia). */
2287 vassert(szB == 128);
2288
sewardj5a9ffab2005-05-12 17:55:01 +00002289 if (0) vex_printf("AbiHint: %s\n", who);
sewardjdd40fdf2006-12-24 02:20:24 +00002290 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
sewardj478646f2008-05-01 20:13:04 +00002291 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
sewardjaca070a2006-10-17 00:28:22 +00002292 if (szB > 0)
2293 stmt( IRStmt_AbiHint(
2294 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
sewardj478646f2008-05-01 20:13:04 +00002295 szB,
2296 mkexpr(nia)
sewardjaca070a2006-10-17 00:28:22 +00002297 ));
sewardj5a9ffab2005-05-12 17:55:01 +00002298}
2299
sewardjb3a04292005-01-21 20:33:44 +00002300
2301/*------------------------------------------------------------*/
2302/*--- Disassembling addressing modes ---*/
2303/*------------------------------------------------------------*/
2304
2305static
florian55085f82012-11-21 00:36:55 +00002306const HChar* segRegTxt ( Prefix pfx )
sewardjb3a04292005-01-21 20:33:44 +00002307{
2308 if (pfx & PFX_CS) return "%cs:";
2309 if (pfx & PFX_DS) return "%ds:";
2310 if (pfx & PFX_ES) return "%es:";
2311 if (pfx & PFX_FS) return "%fs:";
2312 if (pfx & PFX_GS) return "%gs:";
2313 if (pfx & PFX_SS) return "%ss:";
2314 return ""; /* no override */
2315}
2316
2317
2318/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2319 linear address by adding any required segment override as indicated
sewardj42561ef2005-11-04 14:18:31 +00002320 by sorb, and also dealing with any address size override
2321 present. */
sewardjb3a04292005-01-21 20:33:44 +00002322static
floriancacba8e2014-12-15 18:58:07 +00002323IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00002324 Prefix pfx, IRExpr* virtual )
sewardjb3a04292005-01-21 20:33:44 +00002325{
philippee2cc4de2014-12-16 23:57:51 +00002326 /* Note that the below are hacks that relies on the assumption
2327 that %fs or %gs are constant.
2328 Typically, %fs is always 0x63 on linux (in the main thread, it
2329 stays at value 0), %gs always 0x60 on Darwin, ... */
sewardj42561ef2005-11-04 14:18:31 +00002330 /* --- segment overrides --- */
sewardja6b93d12005-02-17 09:28:28 +00002331 if (pfx & PFX_FS) {
philippee2cc4de2014-12-16 23:57:51 +00002332 if (vbi->guest_amd64_assume_fs_is_const) {
2333 /* return virtual + guest_FS_CONST. */
sewardj2e28ac42008-12-04 00:05:12 +00002334 virtual = binop(Iop_Add64, virtual,
philippee2cc4de2014-12-16 23:57:51 +00002335 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
sewardj2e28ac42008-12-04 00:05:12 +00002336 } else {
2337 unimplemented("amd64 %fs segment override");
2338 }
sewardja6b93d12005-02-17 09:28:28 +00002339 }
sewardjb3a04292005-01-21 20:33:44 +00002340
sewardja6b93d12005-02-17 09:28:28 +00002341 if (pfx & PFX_GS) {
philippee2cc4de2014-12-16 23:57:51 +00002342 if (vbi->guest_amd64_assume_gs_is_const) {
2343 /* return virtual + guest_GS_CONST. */
sewardj2e28ac42008-12-04 00:05:12 +00002344 virtual = binop(Iop_Add64, virtual,
philippee2cc4de2014-12-16 23:57:51 +00002345 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
sewardj2e28ac42008-12-04 00:05:12 +00002346 } else {
2347 unimplemented("amd64 %gs segment override");
2348 }
sewardja6b93d12005-02-17 09:28:28 +00002349 }
2350
2351 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
sewardj42561ef2005-11-04 14:18:31 +00002352
2353 /* --- address size override --- */
2354 if (haveASO(pfx))
2355 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2356
sewardja6b93d12005-02-17 09:28:28 +00002357 return virtual;
sewardjb3a04292005-01-21 20:33:44 +00002358}
sewardja6b93d12005-02-17 09:28:28 +00002359
sewardjd20c8852005-01-20 20:04:07 +00002360//.. {
2361//.. Int sreg;
2362//.. IRType hWordTy;
2363//.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2364//..
2365//.. if (sorb == 0)
2366//.. /* the common case - no override */
2367//.. return virtual;
2368//..
2369//.. switch (sorb) {
2370//.. case 0x3E: sreg = R_DS; break;
2371//.. case 0x26: sreg = R_ES; break;
2372//.. case 0x64: sreg = R_FS; break;
2373//.. case 0x65: sreg = R_GS; break;
sewardj42561ef2005-11-04 14:18:31 +00002374//.. default: vpanic("handleAddrOverrides(x86,guest)");
sewardjd20c8852005-01-20 20:04:07 +00002375//.. }
2376//..
2377//.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2378//..
2379//.. seg_selector = newTemp(Ity_I32);
2380//.. ldt_ptr = newTemp(hWordTy);
2381//.. gdt_ptr = newTemp(hWordTy);
2382//.. r64 = newTemp(Ity_I64);
2383//..
2384//.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2385//.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2386//.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2387//..
2388//.. /*
2389//.. Call this to do the translation and limit checks:
2390//.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2391//.. UInt seg_selector, UInt virtual_addr )
2392//.. */
2393//.. assign(
2394//.. r64,
2395//.. mkIRExprCCall(
2396//.. Ity_I64,
2397//.. 0/*regparms*/,
2398//.. "x86g_use_seg_selector",
2399//.. &x86g_use_seg_selector,
2400//.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2401//.. mkexpr(seg_selector), virtual)
2402//.. )
2403//.. );
2404//..
2405//.. /* If the high 32 of the result are non-zero, there was a
2406//.. failure in address translation. In which case, make a
2407//.. quick exit.
2408//.. */
2409//.. stmt(
2410//.. IRStmt_Exit(
2411//.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2412//.. Ijk_MapFail,
2413//.. IRConst_U32( guest_eip_curr_instr )
2414//.. )
2415//.. );
2416//..
2417//.. /* otherwise, here's the translated result. */
2418//.. return unop(Iop_64to32, mkexpr(r64));
2419//.. }
sewardjb3a04292005-01-21 20:33:44 +00002420
2421
2422/* Generate IR to calculate an address indicated by a ModRM and
2423 following SIB bytes. The expression, and the number of bytes in
2424 the address mode, are returned (the latter in *len). Note that
2425 this fn should not be called if the R/M part of the address denotes
2426 a register instead of memory. If print_codegen is true, text of
2427 the addressing mode is placed in buf.
2428
2429 The computed address is stored in a new tempreg, and the
sewardje1698952005-02-08 15:02:39 +00002430 identity of the tempreg is returned.
2431
2432 extra_bytes holds the number of bytes after the amode, as supplied
2433 by the caller. This is needed to make sense of %rip-relative
2434 addresses. Note that the value that *len is set to is only the
2435 length of the amode itself and does not include the value supplied
sewardj09717342005-05-05 21:34:02 +00002436 in extra_bytes.
sewardje1698952005-02-08 15:02:39 +00002437 */
sewardjb3a04292005-01-21 20:33:44 +00002438
2439static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2440{
2441 IRTemp tmp = newTemp(Ity_I64);
2442 assign( tmp, addr64 );
2443 return tmp;
2444}
2445
2446static
sewardj2e28ac42008-12-04 00:05:12 +00002447IRTemp disAMode ( /*OUT*/Int* len,
floriancacba8e2014-12-15 18:58:07 +00002448 const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardj2e28ac42008-12-04 00:05:12 +00002449 /*OUT*/HChar* buf, Int extra_bytes )
sewardjb3a04292005-01-21 20:33:44 +00002450{
sewardj8c332e22005-01-28 01:36:56 +00002451 UChar mod_reg_rm = getUChar(delta);
sewardjb3a04292005-01-21 20:33:44 +00002452 delta++;
2453
2454 buf[0] = (UChar)0;
sewardje1698952005-02-08 15:02:39 +00002455 vassert(extra_bytes >= 0 && extra_bytes < 10);
sewardjb3a04292005-01-21 20:33:44 +00002456
2457 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2458 jump table seems a bit excessive.
2459 */
sewardj7a240552005-01-28 21:37:12 +00002460 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002461 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2462 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002463 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardjb3a04292005-01-21 20:33:44 +00002464 switch (mod_reg_rm) {
2465
2466 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2467 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2468 */
2469 case 0x00: case 0x01: case 0x02: case 0x03:
2470 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj1027dc22005-02-26 01:55:02 +00002471 { UChar rm = toUChar(mod_reg_rm & 7);
sewardjc4356f02007-11-09 21:15:04 +00002472 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002473 *len = 1;
2474 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002475 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
sewardjb3a04292005-01-21 20:33:44 +00002476 }
2477
2478 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2479 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2480 */
2481 case 0x08: case 0x09: case 0x0A: case 0x0B:
2482 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj1027dc22005-02-26 01:55:02 +00002483 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj8c332e22005-01-28 01:36:56 +00002484 Long d = getSDisp8(delta);
sewardj7eaa7cf2005-01-31 18:55:22 +00002485 if (d == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002486 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002487 } else {
sewardjc4356f02007-11-09 21:15:04 +00002488 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002489 }
sewardjb3a04292005-01-21 20:33:44 +00002490 *len = 2;
2491 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002492 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002493 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002494 }
2495
2496 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2497 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2498 */
2499 case 0x10: case 0x11: case 0x12: case 0x13:
2500 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj1027dc22005-02-26 01:55:02 +00002501 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj227458e2005-01-31 19:04:50 +00002502 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002503 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002504 *len = 5;
2505 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002506 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002507 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002508 }
2509
2510 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2511 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2512 case 0x18: case 0x19: case 0x1A: case 0x1B:
2513 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
sewardj354e5c62005-01-27 20:12:52 +00002514 vpanic("disAMode(amd64): not an addr!");
sewardjb3a04292005-01-21 20:33:44 +00002515
sewardj9e6491a2005-07-02 19:24:10 +00002516 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
sewardjb3a04292005-01-21 20:33:44 +00002517 correctly at the start of handling each instruction. */
2518 case 0x05:
sewardj227458e2005-01-31 19:04:50 +00002519 { Long d = getSDisp32(delta);
sewardjb3a04292005-01-21 20:33:44 +00002520 *len = 5;
sewardjc4356f02007-11-09 21:15:04 +00002521 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
sewardj4b744762005-02-07 15:02:25 +00002522 /* We need to know the next instruction's start address.
2523 Try and figure out what it is, record the guess, and ask
2524 the top-level driver logic (bbToIR_AMD64) to check we
2525 guessed right, after the instruction is completely
2526 decoded. */
sewardj9e6491a2005-07-02 19:24:10 +00002527 guest_RIP_next_mustcheck = True;
2528 guest_RIP_next_assumed = guest_RIP_bbstart
sewardje1698952005-02-08 15:02:39 +00002529 + delta+4 + extra_bytes;
sewardjb3a04292005-01-21 20:33:44 +00002530 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002531 handleAddrOverrides(vbi, pfx,
sewardj9e6491a2005-07-02 19:24:10 +00002532 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
sewardjb3a04292005-01-21 20:33:44 +00002533 mkU64(d))));
2534 }
sewardj3ca55a12005-01-27 16:06:23 +00002535
sewardj2f959cc2005-01-26 01:19:35 +00002536 case 0x04: {
2537 /* SIB, with no displacement. Special cases:
sewardj3ca55a12005-01-27 16:06:23 +00002538 -- %rsp cannot act as an index value.
2539 If index_r indicates %rsp, zero is used for the index.
2540 -- when mod is zero and base indicates RBP or R13, base is
2541 instead a 32-bit sign-extended literal.
sewardj2f959cc2005-01-26 01:19:35 +00002542 It's all madness, I tell you. Extract %index, %base and
2543 scale from the SIB byte. The value denoted is then:
sewardj3ca55a12005-01-27 16:06:23 +00002544 | %index == %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002545 = d32 following SIB byte
sewardj3ca55a12005-01-27 16:06:23 +00002546 | %index == %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002547 = %base
sewardj3ca55a12005-01-27 16:06:23 +00002548 | %index != %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002549 = d32 following SIB byte + (%index << scale)
sewardj3ca55a12005-01-27 16:06:23 +00002550 | %index != %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002551 = %base + (%index << scale)
2552 */
sewardj8c332e22005-01-28 01:36:56 +00002553 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002554 UChar scale = toUChar((sib >> 6) & 3);
2555 UChar index_r = toUChar((sib >> 3) & 7);
2556 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002557 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002558 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2559 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
sewardj2f959cc2005-01-26 01:19:35 +00002560 delta++;
sewardjb3a04292005-01-21 20:33:44 +00002561
sewardj3ca55a12005-01-27 16:06:23 +00002562 if ((!index_is_SP) && (!base_is_BPor13)) {
sewardje941eea2005-01-30 19:52:28 +00002563 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002564 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002565 nameIRegRexB(8,pfx,base_r),
2566 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002567 } else {
sewardjc4356f02007-11-09 21:15:04 +00002568 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002569 nameIRegRexB(8,pfx,base_r),
2570 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002571 }
sewardj2f959cc2005-01-26 01:19:35 +00002572 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002573 return
2574 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002575 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002576 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002577 getIRegRexB(8,pfx,base_r),
2578 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj2f959cc2005-01-26 01:19:35 +00002579 mkU8(scale)))));
2580 }
2581
sewardj3ca55a12005-01-27 16:06:23 +00002582 if ((!index_is_SP) && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002583 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002584 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002585 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardj2f959cc2005-01-26 01:19:35 +00002586 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002587 return
2588 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002589 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002590 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002591 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj3ca55a12005-01-27 16:06:23 +00002592 mkU8(scale)),
2593 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002594 }
2595
sewardj3ca55a12005-01-27 16:06:23 +00002596 if (index_is_SP && (!base_is_BPor13)) {
sewardjc4356f02007-11-09 21:15:04 +00002597 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002598 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002599 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002600 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
sewardj2f959cc2005-01-26 01:19:35 +00002601 }
2602
sewardj3ca55a12005-01-27 16:06:23 +00002603 if (index_is_SP && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002604 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002605 DIS(buf, "%s%lld", segRegTxt(pfx), d);
sewardj2f959cc2005-01-26 01:19:35 +00002606 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002607 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002608 handleAddrOverrides(vbi, pfx, mkU64(d)));
sewardj2f959cc2005-01-26 01:19:35 +00002609 }
2610
2611 vassert(0);
2612 }
sewardj3ca55a12005-01-27 16:06:23 +00002613
sewardj2f959cc2005-01-26 01:19:35 +00002614 /* SIB, with 8-bit displacement. Special cases:
2615 -- %esp cannot act as an index value.
2616 If index_r indicates %esp, zero is used for the index.
2617 Denoted value is:
2618 | %index == %ESP
2619 = d8 + %base
2620 | %index != %ESP
2621 = d8 + %base + (%index << scale)
2622 */
2623 case 0x0C: {
sewardj8c332e22005-01-28 01:36:56 +00002624 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002625 UChar scale = toUChar((sib >> 6) & 3);
2626 UChar index_r = toUChar((sib >> 3) & 7);
2627 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002628 Long d = getSDisp8(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002629
sewardj3ca55a12005-01-27 16:06:23 +00002630 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002631 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002632 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002633 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002634 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002635 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002636 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002637 } else {
sewardje941eea2005-01-30 19:52:28 +00002638 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002639 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002640 nameIRegRexB(8,pfx,base_r),
2641 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002642 } else {
sewardjc4356f02007-11-09 21:15:04 +00002643 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002644 nameIRegRexB(8,pfx,base_r),
2645 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002646 }
sewardj2f959cc2005-01-26 01:19:35 +00002647 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002648 return
2649 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002650 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002651 binop(Iop_Add64,
2652 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002653 getIRegRexB(8,pfx,base_r),
sewardj3ca55a12005-01-27 16:06:23 +00002654 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002655 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj3ca55a12005-01-27 16:06:23 +00002656 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002657 }
sewardj3ca55a12005-01-27 16:06:23 +00002658 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002659 }
sewardj3ca55a12005-01-27 16:06:23 +00002660
sewardj2f959cc2005-01-26 01:19:35 +00002661 /* SIB, with 32-bit displacement. Special cases:
2662 -- %rsp cannot act as an index value.
2663 If index_r indicates %rsp, zero is used for the index.
2664 Denoted value is:
2665 | %index == %RSP
2666 = d32 + %base
2667 | %index != %RSP
2668 = d32 + %base + (%index << scale)
2669 */
2670 case 0x14: {
sewardj8c332e22005-01-28 01:36:56 +00002671 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002672 UChar scale = toUChar((sib >> 6) & 3);
2673 UChar index_r = toUChar((sib >> 3) & 7);
2674 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002675 Long d = getSDisp32(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002676
2677 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002678 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002679 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002680 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002681 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002682 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002683 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002684 } else {
sewardje941eea2005-01-30 19:52:28 +00002685 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002686 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002687 nameIRegRexB(8,pfx,base_r),
2688 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002689 } else {
sewardjc4356f02007-11-09 21:15:04 +00002690 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002691 nameIRegRexB(8,pfx,base_r),
2692 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002693 }
sewardj2f959cc2005-01-26 01:19:35 +00002694 *len = 6;
2695 return
2696 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002697 handleAddrOverrides(vbi, pfx,
sewardj2f959cc2005-01-26 01:19:35 +00002698 binop(Iop_Add64,
2699 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002700 getIRegRexB(8,pfx,base_r),
sewardj2f959cc2005-01-26 01:19:35 +00002701 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002702 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj2f959cc2005-01-26 01:19:35 +00002703 mkU64(d))));
2704 }
sewardj3ca55a12005-01-27 16:06:23 +00002705 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002706 }
2707
sewardjb3a04292005-01-21 20:33:44 +00002708 default:
2709 vpanic("disAMode(amd64)");
2710 return 0; /*notreached*/
2711 }
2712}
2713
2714
sewardjcc3d2192013-03-27 11:37:33 +00002715/* Similarly for VSIB addressing. This returns just the addend,
2716 and fills in *rI and *vscale with the register number of the vector
2717 index and its multiplicand. */
2718static
2719IRTemp disAVSIBMode ( /*OUT*/Int* len,
floriancacba8e2014-12-15 18:58:07 +00002720 const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardjcc3d2192013-03-27 11:37:33 +00002721 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2722 IRType ty, /*OUT*/Int* vscale )
2723{
2724 UChar mod_reg_rm = getUChar(delta);
2725 const HChar *vindex;
2726
2727 *len = 0;
2728 *rI = 0;
2729 *vscale = 0;
2730 buf[0] = (UChar)0;
2731 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2732 return IRTemp_INVALID;
2733
2734 UChar sib = getUChar(delta+1);
2735 UChar scale = toUChar((sib >> 6) & 3);
2736 UChar index_r = toUChar((sib >> 3) & 7);
2737 UChar base_r = toUChar(sib & 7);
2738 Long d = 0;
2739 /* correct since #(R13) == 8 + #(RBP) */
2740 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2741 delta += 2;
2742 *len = 2;
2743
2744 *rI = index_r | (getRexX(pfx) << 3);
2745 if (ty == Ity_V128)
2746 vindex = nameXMMReg(*rI);
2747 else
2748 vindex = nameYMMReg(*rI);
2749 *vscale = 1<<scale;
2750
2751 switch (mod_reg_rm >> 6) {
2752 case 0:
2753 if (base_is_BPor13) {
2754 d = getSDisp32(delta);
2755 *len += 4;
2756 if (scale == 0) {
2757 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2758 } else {
2759 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2760 }
2761 return disAMode_copy2tmp( mkU64(d) );
2762 } else {
2763 if (scale == 0) {
2764 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2765 nameIRegRexB(8,pfx,base_r), vindex);
2766 } else {
2767 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2768 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2769 }
2770 }
2771 break;
2772 case 1:
2773 d = getSDisp8(delta);
2774 *len += 1;
2775 goto have_disp;
2776 case 2:
2777 d = getSDisp32(delta);
2778 *len += 4;
2779 have_disp:
2780 if (scale == 0) {
2781 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2782 nameIRegRexB(8,pfx,base_r), vindex);
2783 } else {
2784 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2785 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2786 }
2787 break;
2788 }
2789
2790 if (!d)
2791 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2792 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2793 mkU64(d)) );
2794}
2795
2796
sewardj3ca55a12005-01-27 16:06:23 +00002797/* Figure out the number of (insn-stream) bytes constituting the amode
2798 beginning at delta. Is useful for getting hold of literals beyond
2799 the end of the amode before it has been disassembled. */
2800
sewardj270def42005-07-03 01:03:01 +00002801static UInt lengthAMode ( Prefix pfx, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +00002802{
sewardj8c332e22005-01-28 01:36:56 +00002803 UChar mod_reg_rm = getUChar(delta);
sewardj3ca55a12005-01-27 16:06:23 +00002804 delta++;
2805
2806 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2807 jump table seems a bit excessive.
2808 */
sewardj7a240552005-01-28 21:37:12 +00002809 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002810 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2811 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002812 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardj3ca55a12005-01-27 16:06:23 +00002813 switch (mod_reg_rm) {
2814
2815 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2816 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2817 */
2818 case 0x00: case 0x01: case 0x02: case 0x03:
2819 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj3ca55a12005-01-27 16:06:23 +00002820 return 1;
2821
2822 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2823 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2824 */
2825 case 0x08: case 0x09: case 0x0A: case 0x0B:
2826 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj3ca55a12005-01-27 16:06:23 +00002827 return 2;
2828
2829 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2830 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2831 */
2832 case 0x10: case 0x11: case 0x12: case 0x13:
2833 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj3ca55a12005-01-27 16:06:23 +00002834 return 5;
2835
2836 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2837 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2838 /* Not an address, but still handled. */
2839 case 0x18: case 0x19: case 0x1A: case 0x1B:
2840 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2841 return 1;
2842
2843 /* RIP + disp32. */
2844 case 0x05:
sewardj3ca55a12005-01-27 16:06:23 +00002845 return 5;
2846
2847 case 0x04: {
2848 /* SIB, with no displacement. */
sewardj8c332e22005-01-28 01:36:56 +00002849 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002850 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002851 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002852 Bool base_is_BPor13 = toBool(base_r == R_RBP);
sewardj3ca55a12005-01-27 16:06:23 +00002853
2854 if (base_is_BPor13) {
sewardj3ca55a12005-01-27 16:06:23 +00002855 return 6;
2856 } else {
sewardj3ca55a12005-01-27 16:06:23 +00002857 return 2;
2858 }
2859 }
2860
2861 /* SIB, with 8-bit displacement. */
2862 case 0x0C:
2863 return 3;
2864
2865 /* SIB, with 32-bit displacement. */
2866 case 0x14:
2867 return 6;
2868
2869 default:
2870 vpanic("lengthAMode(amd64)");
2871 return 0; /*notreached*/
2872 }
2873}
2874
2875
sewardjdf0e0022005-01-25 15:48:43 +00002876/*------------------------------------------------------------*/
2877/*--- Disassembling common idioms ---*/
2878/*------------------------------------------------------------*/
2879
sewardjdf0e0022005-01-25 15:48:43 +00002880/* Handle binary integer instructions of the form
2881 op E, G meaning
2882 op reg-or-mem, reg
2883 Is passed the a ptr to the modRM byte, the actual operation, and the
2884 data size. Returns the address advanced completely over this
2885 instruction.
2886
2887 E(src) is reg-or-mem
2888 G(dst) is reg.
2889
2890 If E is reg, --> GET %G, tmp
2891 OP %E, tmp
2892 PUT tmp, %G
2893
2894 If E is mem and OP is not reversible,
2895 --> (getAddr E) -> tmpa
2896 LD (tmpa), tmpa
2897 GET %G, tmp2
2898 OP tmpa, tmp2
2899 PUT tmp2, %G
2900
2901 If E is mem and OP is reversible
2902 --> (getAddr E) -> tmpa
2903 LD (tmpa), tmpa
2904 OP %G, tmpa
2905 PUT tmpa, %G
2906*/
2907static
floriancacba8e2014-12-15 18:58:07 +00002908ULong dis_op2_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00002909 Prefix pfx,
sewardjdf0e0022005-01-25 15:48:43 +00002910 Bool addSubCarry,
2911 IROp op8,
2912 Bool keep,
2913 Int size,
sewardj270def42005-07-03 01:03:01 +00002914 Long delta0,
florian55085f82012-11-21 00:36:55 +00002915 const HChar* t_amd64opc )
sewardjdf0e0022005-01-25 15:48:43 +00002916{
2917 HChar dis_buf[50];
2918 Int len;
2919 IRType ty = szToITy(size);
2920 IRTemp dst1 = newTemp(ty);
2921 IRTemp src = newTemp(ty);
2922 IRTemp dst0 = newTemp(ty);
2923 UChar rm = getUChar(delta0);
2924 IRTemp addr = IRTemp_INVALID;
2925
2926 /* addSubCarry == True indicates the intended operation is
2927 add-with-carry or subtract-with-borrow. */
2928 if (addSubCarry) {
2929 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2930 vassert(keep);
2931 }
2932
2933 if (epartIsReg(rm)) {
sewardjdf0e0022005-01-25 15:48:43 +00002934 /* Specially handle XOR reg,reg, because that doesn't really
2935 depend on reg, and doing the obvious thing potentially
2936 generates a spurious value check failure due to the bogus
2937 dependency. */
sewardj5b470602005-02-27 13:10:48 +00002938 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2939 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
sewardj4f9847d2005-07-25 11:58:34 +00002940 if (False && op8 == Iop_Sub8)
sewardj5b470602005-02-27 13:10:48 +00002941 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
sewardjcc3d2192013-03-27 11:37:33 +00002942 putIRegG(size,pfx,rm, mkU(ty,0));
sewardjdf0e0022005-01-25 15:48:43 +00002943 }
sewardj5b470602005-02-27 13:10:48 +00002944
2945 assign( dst0, getIRegG(size,pfx,rm) );
2946 assign( src, getIRegE(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002947
2948 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002949 helper_ADC( size, dst1, dst0, src,
2950 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002951 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002952 } else
2953 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002954 helper_SBB( size, dst1, dst0, src,
2955 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002956 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002957 } else {
2958 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2959 if (isAddSub(op8))
2960 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2961 else
2962 setFlags_DEP1(op8, dst1, ty);
2963 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002964 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002965 }
2966
2967 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002968 nameIRegE(size,pfx,rm),
2969 nameIRegG(size,pfx,rm));
sewardjdf0e0022005-01-25 15:48:43 +00002970 return 1+delta0;
2971 } else {
2972 /* E refers to memory */
sewardj2e28ac42008-12-04 00:05:12 +00002973 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00002974 assign( dst0, getIRegG(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002975 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2976
2977 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002978 helper_ADC( size, dst1, dst0, src,
2979 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002980 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002981 } else
2982 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002983 helper_SBB( size, dst1, dst0, src,
2984 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002985 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002986 } else {
2987 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2988 if (isAddSub(op8))
2989 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2990 else
2991 setFlags_DEP1(op8, dst1, ty);
2992 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002993 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002994 }
2995
2996 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002997 dis_buf, nameIRegG(size, pfx, rm));
sewardjdf0e0022005-01-25 15:48:43 +00002998 return len+delta0;
2999 }
3000}
3001
3002
3003
sewardj3ca55a12005-01-27 16:06:23 +00003004/* Handle binary integer instructions of the form
3005 op G, E meaning
3006 op reg, reg-or-mem
3007 Is passed the a ptr to the modRM byte, the actual operation, and the
3008 data size. Returns the address advanced completely over this
3009 instruction.
3010
3011 G(src) is reg.
3012 E(dst) is reg-or-mem
3013
3014 If E is reg, --> GET %E, tmp
3015 OP %G, tmp
3016 PUT tmp, %E
3017
3018 If E is mem, --> (getAddr E) -> tmpa
3019 LD (tmpa), tmpv
3020 OP %G, tmpv
3021 ST tmpv, (tmpa)
3022*/
3023static
floriancacba8e2014-12-15 18:58:07 +00003024ULong dis_op2_G_E ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003025 Prefix pfx,
sewardj8c332e22005-01-28 01:36:56 +00003026 Bool addSubCarry,
3027 IROp op8,
3028 Bool keep,
3029 Int size,
sewardj270def42005-07-03 01:03:01 +00003030 Long delta0,
florian55085f82012-11-21 00:36:55 +00003031 const HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00003032{
3033 HChar dis_buf[50];
3034 Int len;
3035 IRType ty = szToITy(size);
3036 IRTemp dst1 = newTemp(ty);
3037 IRTemp src = newTemp(ty);
3038 IRTemp dst0 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00003039 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00003040 IRTemp addr = IRTemp_INVALID;
3041
3042 /* addSubCarry == True indicates the intended operation is
3043 add-with-carry or subtract-with-borrow. */
3044 if (addSubCarry) {
3045 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
3046 vassert(keep);
3047 }
3048
3049 if (epartIsReg(rm)) {
3050 /* Specially handle XOR reg,reg, because that doesn't really
3051 depend on reg, and doing the obvious thing potentially
3052 generates a spurious value check failure due to the bogus
sewardj5b470602005-02-27 13:10:48 +00003053 dependency. Ditto SBB reg,reg. */
3054 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
3055 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3056 putIRegE(size,pfx,rm, mkU(ty,0));
sewardj3ca55a12005-01-27 16:06:23 +00003057 }
sewardj5b470602005-02-27 13:10:48 +00003058
3059 assign(dst0, getIRegE(size,pfx,rm));
3060 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003061
3062 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00003063 helper_ADC( size, dst1, dst0, src,
3064 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00003065 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003066 } else
3067 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00003068 helper_SBB( size, dst1, dst0, src,
3069 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00003070 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003071 } else {
3072 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3073 if (isAddSub(op8))
3074 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3075 else
3076 setFlags_DEP1(op8, dst1, ty);
3077 if (keep)
sewardj5b470602005-02-27 13:10:48 +00003078 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003079 }
3080
3081 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003082 nameIRegG(size,pfx,rm),
3083 nameIRegE(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003084 return 1+delta0;
3085 }
3086
3087 /* E refers to memory */
3088 {
sewardj2e28ac42008-12-04 00:05:12 +00003089 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003090 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj5b470602005-02-27 13:10:48 +00003091 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003092
3093 if (addSubCarry && op8 == Iop_Add8) {
sewardj6491f862013-10-15 17:29:19 +00003094 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003095 /* cas-style store */
3096 helper_ADC( size, dst1, dst0, src,
3097 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3098 } else {
3099 /* normal store */
3100 helper_ADC( size, dst1, dst0, src,
3101 /*store*/addr, IRTemp_INVALID, 0 );
3102 }
sewardj3ca55a12005-01-27 16:06:23 +00003103 } else
3104 if (addSubCarry && op8 == Iop_Sub8) {
sewardj6491f862013-10-15 17:29:19 +00003105 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003106 /* cas-style store */
3107 helper_SBB( size, dst1, dst0, src,
3108 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3109 } else {
3110 /* normal store */
3111 helper_SBB( size, dst1, dst0, src,
3112 /*store*/addr, IRTemp_INVALID, 0 );
3113 }
sewardj3ca55a12005-01-27 16:06:23 +00003114 } else {
3115 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003116 if (keep) {
sewardj6491f862013-10-15 17:29:19 +00003117 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003118 if (0) vex_printf("locked case\n" );
3119 casLE( mkexpr(addr),
3120 mkexpr(dst0)/*expval*/,
3121 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3122 } else {
3123 if (0) vex_printf("nonlocked case\n");
3124 storeLE(mkexpr(addr), mkexpr(dst1));
3125 }
3126 }
sewardj3ca55a12005-01-27 16:06:23 +00003127 if (isAddSub(op8))
3128 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3129 else
3130 setFlags_DEP1(op8, dst1, ty);
sewardj3ca55a12005-01-27 16:06:23 +00003131 }
3132
3133 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003134 nameIRegG(size,pfx,rm), dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003135 return len+delta0;
3136 }
3137}
3138
3139
sewardj1389d4d2005-01-28 13:46:29 +00003140/* Handle move instructions of the form
3141 mov E, G meaning
3142 mov reg-or-mem, reg
3143 Is passed the a ptr to the modRM byte, and the data size. Returns
3144 the address advanced completely over this instruction.
3145
3146 E(src) is reg-or-mem
3147 G(dst) is reg.
3148
3149 If E is reg, --> GET %E, tmpv
3150 PUT tmpv, %G
3151
3152 If E is mem --> (getAddr E) -> tmpa
3153 LD (tmpa), tmpb
3154 PUT tmpb, %G
3155*/
3156static
floriancacba8e2014-12-15 18:58:07 +00003157ULong dis_mov_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003158 Prefix pfx,
sewardj1389d4d2005-01-28 13:46:29 +00003159 Int size,
sewardj270def42005-07-03 01:03:01 +00003160 Long delta0 )
sewardj1389d4d2005-01-28 13:46:29 +00003161{
3162 Int len;
3163 UChar rm = getUChar(delta0);
3164 HChar dis_buf[50];
3165
3166 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003167 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003168 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003169 nameIRegE(size,pfx,rm),
3170 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003171 return 1+delta0;
3172 }
3173
3174 /* E refers to memory */
3175 {
sewardj2e28ac42008-12-04 00:05:12 +00003176 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003177 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
sewardj1389d4d2005-01-28 13:46:29 +00003178 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003179 dis_buf,
3180 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003181 return delta0+len;
3182 }
3183}
3184
3185
3186/* Handle move instructions of the form
3187 mov G, E meaning
3188 mov reg, reg-or-mem
3189 Is passed the a ptr to the modRM byte, and the data size. Returns
3190 the address advanced completely over this instruction.
sewardj38b1d692013-10-15 17:21:42 +00003191 We have to decide here whether F2 or F3 are acceptable. F2 never is.
sewardj1389d4d2005-01-28 13:46:29 +00003192
3193 G(src) is reg.
3194 E(dst) is reg-or-mem
3195
3196 If E is reg, --> GET %G, tmp
3197 PUT tmp, %E
3198
3199 If E is mem, --> (getAddr E) -> tmpa
3200 GET %G, tmpv
3201 ST tmpv, (tmpa)
3202*/
3203static
floriancacba8e2014-12-15 18:58:07 +00003204ULong dis_mov_G_E ( const VexAbiInfo* vbi,
sewardj38b1d692013-10-15 17:21:42 +00003205 Prefix pfx,
3206 Int size,
3207 Long delta0,
3208 /*OUT*/Bool* ok )
sewardj1389d4d2005-01-28 13:46:29 +00003209{
sewardj38b1d692013-10-15 17:21:42 +00003210 Int len;
sewardj1389d4d2005-01-28 13:46:29 +00003211 UChar rm = getUChar(delta0);
3212 HChar dis_buf[50];
3213
sewardj38b1d692013-10-15 17:21:42 +00003214 *ok = True;
3215
sewardj1389d4d2005-01-28 13:46:29 +00003216 if (epartIsReg(rm)) {
sewardj38b1d692013-10-15 17:21:42 +00003217 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
sewardj5b470602005-02-27 13:10:48 +00003218 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003219 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003220 nameIRegG(size,pfx,rm),
3221 nameIRegE(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003222 return 1+delta0;
3223 }
3224
3225 /* E refers to memory */
3226 {
sewardj38b1d692013-10-15 17:21:42 +00003227 if (haveF2(pfx)) { *ok = False; return delta0; }
3228 /* F3(XRELEASE) is acceptable, though. */
sewardj2e28ac42008-12-04 00:05:12 +00003229 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003230 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
sewardj1389d4d2005-01-28 13:46:29 +00003231 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003232 nameIRegG(size,pfx,rm),
3233 dis_buf);
sewardj1389d4d2005-01-28 13:46:29 +00003234 return len+delta0;
3235 }
3236}
sewardj3ca55a12005-01-27 16:06:23 +00003237
3238
3239/* op $immediate, AL/AX/EAX/RAX. */
3240static
sewardj8c332e22005-01-28 01:36:56 +00003241ULong dis_op_imm_A ( Int size,
sewardj41c01092005-07-23 13:50:32 +00003242 Bool carrying,
sewardj8c332e22005-01-28 01:36:56 +00003243 IROp op8,
3244 Bool keep,
sewardj270def42005-07-03 01:03:01 +00003245 Long delta,
florian55085f82012-11-21 00:36:55 +00003246 const HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00003247{
3248 Int size4 = imin(size,4);
3249 IRType ty = szToITy(size);
3250 IRTemp dst0 = newTemp(ty);
3251 IRTemp src = newTemp(ty);
3252 IRTemp dst1 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00003253 Long lit = getSDisp(size4,delta);
sewardj5b470602005-02-27 13:10:48 +00003254 assign(dst0, getIRegRAX(size));
sewardj1389d4d2005-01-28 13:46:29 +00003255 assign(src, mkU(ty,lit & mkSizeMask(size)));
sewardj41c01092005-07-23 13:50:32 +00003256
3257 if (isAddSub(op8) && !carrying) {
3258 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003259 setFlags_DEP1_DEP2(op8, dst0, src, ty);
sewardj41c01092005-07-23 13:50:32 +00003260 }
sewardj3ca55a12005-01-27 16:06:23 +00003261 else
sewardj41c01092005-07-23 13:50:32 +00003262 if (isLogic(op8)) {
3263 vassert(!carrying);
3264 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003265 setFlags_DEP1(op8, dst1, ty);
sewardj41c01092005-07-23 13:50:32 +00003266 }
sewardj3ca55a12005-01-27 16:06:23 +00003267 else
sewardj41c01092005-07-23 13:50:32 +00003268 if (op8 == Iop_Add8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003269 helper_ADC( size, dst1, dst0, src,
3270 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj41c01092005-07-23 13:50:32 +00003271 }
3272 else
sewardj5fadaf92006-05-12 20:45:59 +00003273 if (op8 == Iop_Sub8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003274 helper_SBB( size, dst1, dst0, src,
3275 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5fadaf92006-05-12 20:45:59 +00003276 }
3277 else
sewardj41c01092005-07-23 13:50:32 +00003278 vpanic("dis_op_imm_A(amd64,guest)");
sewardj3ca55a12005-01-27 16:06:23 +00003279
3280 if (keep)
sewardj5b470602005-02-27 13:10:48 +00003281 putIRegRAX(size, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003282
3283 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003284 lit, nameIRegRAX(size));
sewardj3ca55a12005-01-27 16:06:23 +00003285 return delta+size4;
3286}
3287
3288
sewardj5e525292005-01-28 15:13:10 +00003289/* Sign- and Zero-extending moves. */
3290static
floriancacba8e2014-12-15 18:58:07 +00003291ULong dis_movx_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003292 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003293 Long delta, Int szs, Int szd, Bool sign_extend )
sewardj5e525292005-01-28 15:13:10 +00003294{
3295 UChar rm = getUChar(delta);
3296 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003297 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003298 doScalarWidening(
3299 szs,szd,sign_extend,
sewardj5b470602005-02-27 13:10:48 +00003300 getIRegE(szs,pfx,rm)));
sewardj5e525292005-01-28 15:13:10 +00003301 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3302 nameISize(szs),
3303 nameISize(szd),
sewardj5b470602005-02-27 13:10:48 +00003304 nameIRegE(szs,pfx,rm),
3305 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003306 return 1+delta;
3307 }
3308
3309 /* E refers to memory */
3310 {
3311 Int len;
3312 HChar dis_buf[50];
sewardj2e28ac42008-12-04 00:05:12 +00003313 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003314 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003315 doScalarWidening(
3316 szs,szd,sign_extend,
3317 loadLE(szToITy(szs),mkexpr(addr))));
3318 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3319 nameISize(szs),
3320 nameISize(szd),
3321 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00003322 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003323 return len+delta;
3324 }
3325}
sewardj32b2bbe2005-01-28 00:50:10 +00003326
3327
sewardj03b07cc2005-01-31 18:09:43 +00003328/* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3329 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
sewardj32b2bbe2005-01-28 00:50:10 +00003330static
3331void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3332{
sewardj03b07cc2005-01-31 18:09:43 +00003333 /* special-case the 64-bit case */
3334 if (sz == 8) {
3335 IROp op = signed_divide ? Iop_DivModS128to64
3336 : Iop_DivModU128to64;
sewardja6b93d12005-02-17 09:28:28 +00003337 IRTemp src128 = newTemp(Ity_I128);
3338 IRTemp dst128 = newTemp(Ity_I128);
sewardj03b07cc2005-01-31 18:09:43 +00003339 assign( src128, binop(Iop_64HLto128,
sewardja6b93d12005-02-17 09:28:28 +00003340 getIReg64(R_RDX),
3341 getIReg64(R_RAX)) );
sewardj03b07cc2005-01-31 18:09:43 +00003342 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
sewardja6b93d12005-02-17 09:28:28 +00003343 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3344 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
sewardj03b07cc2005-01-31 18:09:43 +00003345 } else {
3346 IROp op = signed_divide ? Iop_DivModS64to32
3347 : Iop_DivModU64to32;
3348 IRTemp src64 = newTemp(Ity_I64);
3349 IRTemp dst64 = newTemp(Ity_I64);
3350 switch (sz) {
sewardj85520e42005-02-19 15:22:38 +00003351 case 4:
sewardj5b470602005-02-27 13:10:48 +00003352 assign( src64,
3353 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3354 assign( dst64,
3355 binop(op, mkexpr(src64), mkexpr(t)) );
3356 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3357 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
sewardj85520e42005-02-19 15:22:38 +00003358 break;
3359 case 2: {
3360 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3361 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3362 assign( src64, unop(widen3264,
3363 binop(Iop_16HLto32,
sewardj5b470602005-02-27 13:10:48 +00003364 getIRegRDX(2),
3365 getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003366 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
sewardj5b470602005-02-27 13:10:48 +00003367 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3368 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
sewardj85520e42005-02-19 15:22:38 +00003369 break;
3370 }
3371 case 1: {
3372 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3373 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3374 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3375 assign( src64, unop(widen3264,
sewardj5b470602005-02-27 13:10:48 +00003376 unop(widen1632, getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003377 assign( dst64,
3378 binop(op, mkexpr(src64),
3379 unop(widen1632, unop(widen816, mkexpr(t)))) );
sewardj5b470602005-02-27 13:10:48 +00003380 putIRegRAX( 1, unop(Iop_16to8,
3381 unop(Iop_32to16,
3382 unop(Iop_64to32,mkexpr(dst64)))) );
3383 putIRegAH( unop(Iop_16to8,
3384 unop(Iop_32to16,
3385 unop(Iop_64HIto32,mkexpr(dst64)))) );
sewardj85520e42005-02-19 15:22:38 +00003386 break;
3387 }
3388 default:
3389 vpanic("codegen_div(amd64)");
sewardj03b07cc2005-01-31 18:09:43 +00003390 }
sewardj32b2bbe2005-01-28 00:50:10 +00003391 }
3392}
sewardj3ca55a12005-01-27 16:06:23 +00003393
3394static
floriancacba8e2014-12-15 18:58:07 +00003395ULong dis_Grp1 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003396 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003397 Long delta, UChar modrm,
sewardj227458e2005-01-31 19:04:50 +00003398 Int am_sz, Int d_sz, Int sz, Long d64 )
sewardj3ca55a12005-01-27 16:06:23 +00003399{
3400 Int len;
3401 HChar dis_buf[50];
3402 IRType ty = szToITy(sz);
3403 IRTemp dst1 = newTemp(ty);
3404 IRTemp src = newTemp(ty);
3405 IRTemp dst0 = newTemp(ty);
3406 IRTemp addr = IRTemp_INVALID;
3407 IROp op8 = Iop_INVALID;
sewardj1389d4d2005-01-28 13:46:29 +00003408 ULong mask = mkSizeMask(sz);
sewardj3ca55a12005-01-27 16:06:23 +00003409
sewardj901ed122005-02-27 13:25:31 +00003410 switch (gregLO3ofRM(modrm)) {
sewardj3ca55a12005-01-27 16:06:23 +00003411 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3412 case 2: break; // ADC
3413 case 3: break; // SBB
3414 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3415 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
sewardjfd4203c2007-03-21 00:21:56 +00003416 /*NOTREACHED*/
sewardj3ca55a12005-01-27 16:06:23 +00003417 default: vpanic("dis_Grp1(amd64): unhandled case");
3418 }
3419
3420 if (epartIsReg(modrm)) {
3421 vassert(am_sz == 1);
3422
sewardj5b470602005-02-27 13:10:48 +00003423 assign(dst0, getIRegE(sz,pfx,modrm));
sewardj227458e2005-01-31 19:04:50 +00003424 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003425
sewardj901ed122005-02-27 13:25:31 +00003426 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardje9d8a262009-07-01 08:06:34 +00003427 helper_ADC( sz, dst1, dst0, src,
3428 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003429 } else
sewardj901ed122005-02-27 13:25:31 +00003430 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardje9d8a262009-07-01 08:06:34 +00003431 helper_SBB( sz, dst1, dst0, src,
3432 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003433 } else {
3434 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3435 if (isAddSub(op8))
3436 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3437 else
3438 setFlags_DEP1(op8, dst1, ty);
3439 }
3440
sewardj901ed122005-02-27 13:25:31 +00003441 if (gregLO3ofRM(modrm) < 7)
sewardj5b470602005-02-27 13:10:48 +00003442 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003443
3444 delta += (am_sz + d_sz);
sewardje941eea2005-01-30 19:52:28 +00003445 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003446 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00003447 nameIRegE(sz,pfx,modrm));
sewardj3ca55a12005-01-27 16:06:23 +00003448 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003449 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj3ca55a12005-01-27 16:06:23 +00003450
3451 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj227458e2005-01-31 19:04:50 +00003452 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003453
sewardj901ed122005-02-27 13:25:31 +00003454 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardj6491f862013-10-15 17:29:19 +00003455 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003456 /* cas-style store */
3457 helper_ADC( sz, dst1, dst0, src,
3458 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3459 } else {
3460 /* normal store */
3461 helper_ADC( sz, dst1, dst0, src,
3462 /*store*/addr, IRTemp_INVALID, 0 );
3463 }
sewardj3ca55a12005-01-27 16:06:23 +00003464 } else
sewardj901ed122005-02-27 13:25:31 +00003465 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardj6491f862013-10-15 17:29:19 +00003466 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003467 /* cas-style store */
3468 helper_SBB( sz, dst1, dst0, src,
3469 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3470 } else {
3471 /* normal store */
3472 helper_SBB( sz, dst1, dst0, src,
3473 /*store*/addr, IRTemp_INVALID, 0 );
3474 }
sewardj3ca55a12005-01-27 16:06:23 +00003475 } else {
3476 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003477 if (gregLO3ofRM(modrm) < 7) {
sewardj6491f862013-10-15 17:29:19 +00003478 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003479 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3480 mkexpr(dst1)/*newVal*/,
3481 guest_RIP_curr_instr );
3482 } else {
3483 storeLE(mkexpr(addr), mkexpr(dst1));
3484 }
3485 }
sewardj3ca55a12005-01-27 16:06:23 +00003486 if (isAddSub(op8))
3487 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3488 else
3489 setFlags_DEP1(op8, dst1, ty);
3490 }
3491
sewardj3ca55a12005-01-27 16:06:23 +00003492 delta += (len+d_sz);
sewardje941eea2005-01-30 19:52:28 +00003493 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003494 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
sewardj227458e2005-01-31 19:04:50 +00003495 d64, dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003496 }
3497 return delta;
3498}
3499
3500
sewardj118b23e2005-01-29 02:14:44 +00003501/* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3502 expression. */
3503
3504static
floriancacba8e2014-12-15 18:58:07 +00003505ULong dis_Grp2 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003506 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003507 Long delta, UChar modrm,
sewardj118b23e2005-01-29 02:14:44 +00003508 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
florian55085f82012-11-21 00:36:55 +00003509 const HChar* shift_expr_txt, Bool* decode_OK )
sewardj118b23e2005-01-29 02:14:44 +00003510{
3511 /* delta on entry points at the modrm byte. */
3512 HChar dis_buf[50];
3513 Int len;
sewardjb5e5c6d2007-01-12 20:29:01 +00003514 Bool isShift, isRotate, isRotateC;
sewardj118b23e2005-01-29 02:14:44 +00003515 IRType ty = szToITy(sz);
3516 IRTemp dst0 = newTemp(ty);
3517 IRTemp dst1 = newTemp(ty);
3518 IRTemp addr = IRTemp_INVALID;
3519
sewardjfd4203c2007-03-21 00:21:56 +00003520 *decode_OK = True;
3521
sewardj118b23e2005-01-29 02:14:44 +00003522 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3523
3524 /* Put value to shift/rotate in dst0. */
3525 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003526 assign(dst0, getIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00003527 delta += (am_sz + d_sz);
3528 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003529 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj118b23e2005-01-29 02:14:44 +00003530 assign(dst0, loadLE(ty,mkexpr(addr)));
3531 delta += len + d_sz;
3532 }
3533
3534 isShift = False;
tomd6b43fd2011-08-19 16:06:52 +00003535 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
sewardj118b23e2005-01-29 02:14:44 +00003536
3537 isRotate = False;
sewardj901ed122005-02-27 13:25:31 +00003538 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
sewardj118b23e2005-01-29 02:14:44 +00003539
sewardjb5e5c6d2007-01-12 20:29:01 +00003540 isRotateC = False;
3541 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
sewardj118b23e2005-01-29 02:14:44 +00003542
sewardjb5e5c6d2007-01-12 20:29:01 +00003543 if (!isShift && !isRotate && !isRotateC) {
sewardjfd4203c2007-03-21 00:21:56 +00003544 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003545 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3546 }
3547
sewardjb5e5c6d2007-01-12 20:29:01 +00003548 if (isRotateC) {
sewardj112b0992005-07-23 13:19:32 +00003549 /* Call a helper; this insn is so ridiculous it does not deserve
3550 better. One problem is, the helper has to calculate both the
3551 new value and the new flags. This is more than 64 bits, and
3552 there is no way to return more than 64 bits from the helper.
3553 Hence the crude and obvious solution is to call it twice,
3554 using the sign of the sz field to indicate whether it is the
3555 value or rflags result we want.
3556 */
sewardjb5e5c6d2007-01-12 20:29:01 +00003557 Bool left = toBool(gregLO3ofRM(modrm) == 2);
sewardj112b0992005-07-23 13:19:32 +00003558 IRExpr** argsVALUE;
3559 IRExpr** argsRFLAGS;
3560
3561 IRTemp new_value = newTemp(Ity_I64);
3562 IRTemp new_rflags = newTemp(Ity_I64);
3563 IRTemp old_rflags = newTemp(Ity_I64);
3564
3565 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3566
3567 argsVALUE
3568 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3569 widenUto64(shift_expr), /* rotate amount */
3570 mkexpr(old_rflags),
3571 mkU64(sz) );
3572 assign( new_value,
3573 mkIRExprCCall(
3574 Ity_I64,
3575 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003576 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3577 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003578 argsVALUE
3579 )
3580 );
3581
3582 argsRFLAGS
3583 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3584 widenUto64(shift_expr), /* rotate amount */
3585 mkexpr(old_rflags),
3586 mkU64(-sz) );
3587 assign( new_rflags,
3588 mkIRExprCCall(
3589 Ity_I64,
3590 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003591 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3592 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003593 argsRFLAGS
3594 )
3595 );
3596
3597 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3598 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3599 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3600 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3601 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
sewardj118b23e2005-01-29 02:14:44 +00003602 }
3603
sewardj112b0992005-07-23 13:19:32 +00003604 else
sewardj118b23e2005-01-29 02:14:44 +00003605 if (isShift) {
3606
3607 IRTemp pre64 = newTemp(Ity_I64);
3608 IRTemp res64 = newTemp(Ity_I64);
3609 IRTemp res64ss = newTemp(Ity_I64);
3610 IRTemp shift_amt = newTemp(Ity_I8);
sewardj1027dc22005-02-26 01:55:02 +00003611 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003612 IROp op64;
3613
sewardj901ed122005-02-27 13:25:31 +00003614 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00003615 case 4: op64 = Iop_Shl64; break;
3616 case 5: op64 = Iop_Shr64; break;
tomd6b43fd2011-08-19 16:06:52 +00003617 case 6: op64 = Iop_Shl64; break;
sewardj118b23e2005-01-29 02:14:44 +00003618 case 7: op64 = Iop_Sar64; break;
sewardjfd4203c2007-03-21 00:21:56 +00003619 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003620 default: vpanic("dis_Grp2:shift"); break;
3621 }
3622
3623 /* Widen the value to be shifted to 64 bits, do the shift, and
3624 narrow back down. This seems surprisingly long-winded, but
3625 unfortunately the AMD semantics requires that 8/16/32-bit
3626 shifts give defined results for shift values all the way up
sewardj03c96e82005-02-19 18:12:45 +00003627 to 32, and this seems the simplest way to do it. It has the
sewardj118b23e2005-01-29 02:14:44 +00003628 advantage that the only IR level shifts generated are of 64
3629 bit values, and the shift amount is guaranteed to be in the
3630 range 0 .. 63, thereby observing the IR semantics requiring
sewardj03c96e82005-02-19 18:12:45 +00003631 all shift values to be in the range 0 .. 2^word_size-1.
sewardj118b23e2005-01-29 02:14:44 +00003632
sewardj03c96e82005-02-19 18:12:45 +00003633 Therefore the shift amount is masked with 63 for 64-bit shifts
3634 and 31 for all others.
3635 */
3636 /* shift_amt = shift_expr & MASK, regardless of operation size */
3637 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
sewardj118b23e2005-01-29 02:14:44 +00003638
sewardj03c96e82005-02-19 18:12:45 +00003639 /* suitably widen the value to be shifted to 64 bits. */
sewardj118b23e2005-01-29 02:14:44 +00003640 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3641 : widenUto64(mkexpr(dst0)) );
3642
3643 /* res64 = pre64 `shift` shift_amt */
3644 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3645
sewardj03c96e82005-02-19 18:12:45 +00003646 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
sewardj118b23e2005-01-29 02:14:44 +00003647 assign( res64ss,
3648 binop(op64,
3649 mkexpr(pre64),
3650 binop(Iop_And8,
3651 binop(Iop_Sub8,
3652 mkexpr(shift_amt), mkU8(1)),
sewardj03c96e82005-02-19 18:12:45 +00003653 mkU8(mask))) );
sewardj118b23e2005-01-29 02:14:44 +00003654
3655 /* Build the flags thunk. */
3656 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3657
3658 /* Narrow the result back down. */
3659 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3660
3661 } /* if (isShift) */
3662
3663 else
3664 if (isRotate) {
3665 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3666 : (ty==Ity_I32 ? 2 : 3));
sewardj901ed122005-02-27 13:25:31 +00003667 Bool left = toBool(gregLO3ofRM(modrm) == 0);
sewardj118b23e2005-01-29 02:14:44 +00003668 IRTemp rot_amt = newTemp(Ity_I8);
3669 IRTemp rot_amt64 = newTemp(Ity_I8);
3670 IRTemp oldFlags = newTemp(Ity_I64);
sewardj1027dc22005-02-26 01:55:02 +00003671 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003672
3673 /* rot_amt = shift_expr & mask */
3674 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3675 expressions never shift beyond the word size and thus remain
3676 well defined. */
sewardj03c96e82005-02-19 18:12:45 +00003677 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
sewardj118b23e2005-01-29 02:14:44 +00003678
3679 if (ty == Ity_I64)
3680 assign(rot_amt, mkexpr(rot_amt64));
3681 else
3682 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3683
3684 if (left) {
3685
3686 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3687 assign(dst1,
3688 binop( mkSizedOp(ty,Iop_Or8),
3689 binop( mkSizedOp(ty,Iop_Shl8),
3690 mkexpr(dst0),
3691 mkexpr(rot_amt)
3692 ),
3693 binop( mkSizedOp(ty,Iop_Shr8),
3694 mkexpr(dst0),
3695 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3696 )
3697 )
3698 );
3699 ccOp += AMD64G_CC_OP_ROLB;
3700
3701 } else { /* right */
3702
3703 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3704 assign(dst1,
3705 binop( mkSizedOp(ty,Iop_Or8),
3706 binop( mkSizedOp(ty,Iop_Shr8),
3707 mkexpr(dst0),
3708 mkexpr(rot_amt)
3709 ),
3710 binop( mkSizedOp(ty,Iop_Shl8),
3711 mkexpr(dst0),
3712 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3713 )
3714 )
3715 );
3716 ccOp += AMD64G_CC_OP_RORB;
3717
3718 }
3719
3720 /* dst1 now holds the rotated value. Build flag thunk. We
3721 need the resulting value for this, and the previous flags.
3722 Except don't set it if the rotate count is zero. */
3723
3724 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3725
sewardj009230b2013-01-26 11:47:55 +00003726 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3727 IRTemp rot_amt64b = newTemp(Ity_I1);
3728 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3729
sewardj118b23e2005-01-29 02:14:44 +00003730 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3731 stmt( IRStmt_Put( OFFB_CC_OP,
florian99dd03e2013-01-29 03:56:06 +00003732 IRExpr_ITE( mkexpr(rot_amt64b),
3733 mkU64(ccOp),
3734 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003735 stmt( IRStmt_Put( OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00003736 IRExpr_ITE( mkexpr(rot_amt64b),
3737 widenUto64(mkexpr(dst1)),
3738 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003739 stmt( IRStmt_Put( OFFB_CC_DEP2,
florian99dd03e2013-01-29 03:56:06 +00003740 IRExpr_ITE( mkexpr(rot_amt64b),
3741 mkU64(0),
3742 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003743 stmt( IRStmt_Put( OFFB_CC_NDEP,
florian99dd03e2013-01-29 03:56:06 +00003744 IRExpr_ITE( mkexpr(rot_amt64b),
3745 mkexpr(oldFlags),
3746 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003747 } /* if (isRotate) */
3748
3749 /* Save result, and finish up. */
3750 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003751 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj118b23e2005-01-29 02:14:44 +00003752 if (vex_traceflags & VEX_TRACE_FE) {
3753 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003754 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003755 if (shift_expr_txt)
3756 vex_printf("%s", shift_expr_txt);
3757 else
3758 ppIRExpr(shift_expr);
sewardj5b470602005-02-27 13:10:48 +00003759 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
sewardj118b23e2005-01-29 02:14:44 +00003760 }
3761 } else {
3762 storeLE(mkexpr(addr), mkexpr(dst1));
3763 if (vex_traceflags & VEX_TRACE_FE) {
3764 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003765 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003766 if (shift_expr_txt)
3767 vex_printf("%s", shift_expr_txt);
3768 else
3769 ppIRExpr(shift_expr);
3770 vex_printf(", %s\n", dis_buf);
3771 }
3772 }
3773 return delta;
3774}
3775
3776
sewardj1d511802005-03-27 17:59:45 +00003777/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3778static
floriancacba8e2014-12-15 18:58:07 +00003779ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003780 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003781 Long delta, UChar modrm,
sewardj1d511802005-03-27 17:59:45 +00003782 Int am_sz, Int sz, ULong src_val,
3783 Bool* decode_OK )
3784{
3785 /* src_val denotes a d8.
3786 And delta on entry points at the modrm byte. */
sewardj118b23e2005-01-29 02:14:44 +00003787
sewardj1d511802005-03-27 17:59:45 +00003788 IRType ty = szToITy(sz);
3789 IRTemp t2 = newTemp(Ity_I64);
3790 IRTemp t2m = newTemp(Ity_I64);
3791 IRTemp t_addr = IRTemp_INVALID;
3792 HChar dis_buf[50];
3793 ULong mask;
sewardj9b967672005-02-08 11:13:09 +00003794
sewardj1d511802005-03-27 17:59:45 +00003795 /* we're optimists :-) */
3796 *decode_OK = True;
3797
sewardj38b1d692013-10-15 17:21:42 +00003798 /* Check whether F2 or F3 are acceptable. */
3799 if (epartIsReg(modrm)) {
3800 /* F2 or F3 are not allowed in the register case. */
3801 if (haveF2orF3(pfx)) {
3802 *decode_OK = False;
3803 return delta;
3804 }
3805 } else {
3806 /* F2 or F3 (but not both) are allowable provided LOCK is also
3807 present. */
3808 if (haveF2orF3(pfx)) {
3809 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
3810 *decode_OK = False;
3811 return delta;
3812 }
3813 }
3814 }
3815
sewardj1d511802005-03-27 17:59:45 +00003816 /* Limit src_val -- the bit offset -- to something within a word.
3817 The Intel docs say that literal offsets larger than a word are
3818 masked in this way. */
3819 switch (sz) {
3820 case 2: src_val &= 15; break;
3821 case 4: src_val &= 31; break;
sewardj537cab02005-04-07 02:03:52 +00003822 case 8: src_val &= 63; break;
sewardj1d511802005-03-27 17:59:45 +00003823 default: *decode_OK = False; return delta;
3824 }
3825
3826 /* Invent a mask suitable for the operation. */
3827 switch (gregLO3ofRM(modrm)) {
sewardj74b4f892005-05-06 01:43:56 +00003828 case 4: /* BT */ mask = 0; break;
3829 case 5: /* BTS */ mask = 1ULL << src_val; break;
3830 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3831 case 7: /* BTC */ mask = 1ULL << src_val; break;
sewardj1d511802005-03-27 17:59:45 +00003832 /* If this needs to be extended, probably simplest to make a
3833 new function to handle the other cases (0 .. 3). The
3834 Intel docs do however not indicate any use for 0 .. 3, so
3835 we don't expect this to happen. */
3836 default: *decode_OK = False; return delta;
3837 }
3838
3839 /* Fetch the value to be tested and modified into t2, which is
3840 64-bits wide regardless of sz. */
3841 if (epartIsReg(modrm)) {
3842 vassert(am_sz == 1);
3843 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3844 delta += (am_sz + 1);
3845 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3846 nameISize(sz),
3847 src_val, nameIRegE(sz,pfx,modrm));
3848 } else {
3849 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00003850 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
sewardj1d511802005-03-27 17:59:45 +00003851 delta += (len+1);
3852 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3853 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3854 nameISize(sz),
3855 src_val, dis_buf);
3856 }
3857
sewardj1d511802005-03-27 17:59:45 +00003858 /* Compute the new value into t2m, if non-BT. */
3859 switch (gregLO3ofRM(modrm)) {
3860 case 4: /* BT */
3861 break;
3862 case 5: /* BTS */
3863 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3864 break;
3865 case 6: /* BTR */
3866 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3867 break;
3868 case 7: /* BTC */
3869 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3870 break;
3871 default:
sewardje9d8a262009-07-01 08:06:34 +00003872 /*NOTREACHED*/ /*the previous switch guards this*/
sewardj1d511802005-03-27 17:59:45 +00003873 vassert(0);
3874 }
3875
3876 /* Write the result back, if non-BT. */
3877 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3878 if (epartIsReg(modrm)) {
sewardjcc3d2192013-03-27 11:37:33 +00003879 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
sewardj1d511802005-03-27 17:59:45 +00003880 } else {
sewardj6491f862013-10-15 17:29:19 +00003881 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003882 casLE( mkexpr(t_addr),
3883 narrowTo(ty, mkexpr(t2))/*expd*/,
3884 narrowTo(ty, mkexpr(t2m))/*new*/,
3885 guest_RIP_curr_instr );
3886 } else {
3887 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
3888 }
sewardj1d511802005-03-27 17:59:45 +00003889 }
3890 }
3891
sewardje9d8a262009-07-01 08:06:34 +00003892 /* Copy relevant bit from t2 into the carry flag. */
3893 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
3894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3895 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3896 stmt( IRStmt_Put(
3897 OFFB_CC_DEP1,
3898 binop(Iop_And64,
3899 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
3900 mkU64(1))
3901 ));
3902 /* Set NDEP even though it isn't used. This makes redundant-PUT
3903 elimination of previous stores to this field work better. */
3904 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3905
sewardj1d511802005-03-27 17:59:45 +00003906 return delta;
3907}
sewardj9b967672005-02-08 11:13:09 +00003908
3909
3910/* Signed/unsigned widening multiply. Generate IR to multiply the
3911 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
3912 RDX:RAX/EDX:EAX/DX:AX/AX.
3913*/
3914static void codegen_mulL_A_D ( Int sz, Bool syned,
florian55085f82012-11-21 00:36:55 +00003915 IRTemp tmp, const HChar* tmp_txt )
sewardj9b967672005-02-08 11:13:09 +00003916{
3917 IRType ty = szToITy(sz);
3918 IRTemp t1 = newTemp(ty);
3919
sewardj5b470602005-02-27 13:10:48 +00003920 assign( t1, getIRegRAX(sz) );
sewardj9b967672005-02-08 11:13:09 +00003921
3922 switch (ty) {
3923 case Ity_I64: {
3924 IRTemp res128 = newTemp(Ity_I128);
3925 IRTemp resHi = newTemp(Ity_I64);
3926 IRTemp resLo = newTemp(Ity_I64);
3927 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
sewardj8bdb89a2005-05-05 21:46:50 +00003928 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
sewardj9b967672005-02-08 11:13:09 +00003929 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
3930 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3931 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
3932 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
sewardj5b470602005-02-27 13:10:48 +00003933 putIReg64(R_RDX, mkexpr(resHi));
3934 putIReg64(R_RAX, mkexpr(resLo));
sewardj9b967672005-02-08 11:13:09 +00003935 break;
3936 }
sewardj85520e42005-02-19 15:22:38 +00003937 case Ity_I32: {
3938 IRTemp res64 = newTemp(Ity_I64);
3939 IRTemp resHi = newTemp(Ity_I32);
3940 IRTemp resLo = newTemp(Ity_I32);
3941 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
3942 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3943 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
3944 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3945 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
3946 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
sewardj5b470602005-02-27 13:10:48 +00003947 putIRegRDX(4, mkexpr(resHi));
3948 putIRegRAX(4, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003949 break;
3950 }
3951 case Ity_I16: {
3952 IRTemp res32 = newTemp(Ity_I32);
3953 IRTemp resHi = newTemp(Ity_I16);
3954 IRTemp resLo = newTemp(Ity_I16);
3955 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
3956 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3957 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
3958 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3959 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
3960 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
sewardj5b470602005-02-27 13:10:48 +00003961 putIRegRDX(2, mkexpr(resHi));
3962 putIRegRAX(2, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003963 break;
3964 }
3965 case Ity_I8: {
3966 IRTemp res16 = newTemp(Ity_I16);
3967 IRTemp resHi = newTemp(Ity_I8);
3968 IRTemp resLo = newTemp(Ity_I8);
3969 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
3970 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3971 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
3972 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3973 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
3974 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
sewardj5b470602005-02-27 13:10:48 +00003975 putIRegRAX(2, mkexpr(res16));
sewardj85520e42005-02-19 15:22:38 +00003976 break;
3977 }
sewardj9b967672005-02-08 11:13:09 +00003978 default:
sewardj85520e42005-02-19 15:22:38 +00003979 ppIRType(ty);
sewardj9b967672005-02-08 11:13:09 +00003980 vpanic("codegen_mulL_A_D(amd64)");
3981 }
3982 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
3983}
sewardj32b2bbe2005-01-28 00:50:10 +00003984
3985
sewardj38b1d692013-10-15 17:21:42 +00003986/* Group 3 extended opcodes. We have to decide here whether F2 and F3
3987 might be valid.*/
sewardj32b2bbe2005-01-28 00:50:10 +00003988static
floriancacba8e2014-12-15 18:58:07 +00003989ULong dis_Grp3 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00003990 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
sewardj32b2bbe2005-01-28 00:50:10 +00003991{
sewardj227458e2005-01-31 19:04:50 +00003992 Long d64;
sewardj32b2bbe2005-01-28 00:50:10 +00003993 UChar modrm;
3994 HChar dis_buf[50];
3995 Int len;
3996 IRTemp addr;
3997 IRType ty = szToITy(sz);
3998 IRTemp t1 = newTemp(ty);
sewardj55dbb262005-01-28 16:36:51 +00003999 IRTemp dst1, src, dst0;
sewardjfd4203c2007-03-21 00:21:56 +00004000 *decode_OK = True;
sewardj8c332e22005-01-28 01:36:56 +00004001 modrm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00004002 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +00004003 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4004 if (haveF2orF3(pfx)) goto unhandled;
sewardj901ed122005-02-27 13:25:31 +00004005 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00004006 case 0: { /* TEST */
4007 delta++;
4008 d64 = getSDisp(imin(4,sz), delta);
4009 delta += imin(4,sz);
4010 dst1 = newTemp(ty);
4011 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
sewardj5b470602005-02-27 13:10:48 +00004012 getIRegE(sz,pfx,modrm),
sewardj03b07cc2005-01-31 18:09:43 +00004013 mkU(ty, d64 & mkSizeMask(sz))));
sewardj118b23e2005-01-29 02:14:44 +00004014 setFlags_DEP1( Iop_And8, dst1, ty );
sewardj7eaa7cf2005-01-31 18:55:22 +00004015 DIP("test%c $%lld, %s\n",
sewardj118b23e2005-01-29 02:14:44 +00004016 nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00004017 nameIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00004018 break;
4019 }
sewardjfd4203c2007-03-21 00:21:56 +00004020 case 1:
4021 *decode_OK = False;
4022 return delta;
sewardj55dbb262005-01-28 16:36:51 +00004023 case 2: /* NOT */
4024 delta++;
sewardj5b470602005-02-27 13:10:48 +00004025 putIRegE(sz, pfx, modrm,
4026 unop(mkSizedOp(ty,Iop_Not8),
4027 getIRegE(sz, pfx, modrm)));
sewardj55dbb262005-01-28 16:36:51 +00004028 DIP("not%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004029 nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00004030 break;
4031 case 3: /* NEG */
4032 delta++;
4033 dst0 = newTemp(ty);
4034 src = newTemp(ty);
4035 dst1 = newTemp(ty);
4036 assign(dst0, mkU(ty,0));
sewardj5b470602005-02-27 13:10:48 +00004037 assign(src, getIRegE(sz, pfx, modrm));
sewardj2e28ac42008-12-04 00:05:12 +00004038 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4039 mkexpr(src)));
sewardj55dbb262005-01-28 16:36:51 +00004040 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj5b470602005-02-27 13:10:48 +00004041 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4042 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00004043 break;
sewardj9b967672005-02-08 11:13:09 +00004044 case 4: /* MUL (unsigned widening) */
4045 delta++;
4046 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00004047 assign(src, getIRegE(sz,pfx,modrm));
sewardj9b967672005-02-08 11:13:09 +00004048 codegen_mulL_A_D ( sz, False, src,
sewardj5b470602005-02-27 13:10:48 +00004049 nameIRegE(sz,pfx,modrm) );
sewardj9b967672005-02-08 11:13:09 +00004050 break;
sewardj85520e42005-02-19 15:22:38 +00004051 case 5: /* IMUL (signed widening) */
4052 delta++;
4053 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00004054 assign(src, getIRegE(sz,pfx,modrm));
sewardj85520e42005-02-19 15:22:38 +00004055 codegen_mulL_A_D ( sz, True, src,
sewardj5b470602005-02-27 13:10:48 +00004056 nameIRegE(sz,pfx,modrm) );
sewardj85520e42005-02-19 15:22:38 +00004057 break;
sewardj03b07cc2005-01-31 18:09:43 +00004058 case 6: /* DIV */
4059 delta++;
sewardj5b470602005-02-27 13:10:48 +00004060 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj03b07cc2005-01-31 18:09:43 +00004061 codegen_div ( sz, t1, False );
4062 DIP("div%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004063 nameIRegE(sz, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004064 break;
sewardj32b2bbe2005-01-28 00:50:10 +00004065 case 7: /* IDIV */
4066 delta++;
sewardj5b470602005-02-27 13:10:48 +00004067 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004068 codegen_div ( sz, t1, True );
4069 DIP("idiv%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004070 nameIRegE(sz, pfx, modrm));
sewardj32b2bbe2005-01-28 00:50:10 +00004071 break;
4072 default:
sewardjfd4203c2007-03-21 00:21:56 +00004073 /*NOTREACHED*/
4074 vpanic("Grp3(amd64,R)");
sewardj32b2bbe2005-01-28 00:50:10 +00004075 }
4076 } else {
sewardj38b1d692013-10-15 17:21:42 +00004077 /* Decide if F2/XACQ or F3/XREL might be valid. */
4078 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4079 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4080 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4081 validF2orF3 = True;
4082 }
4083 if (!validF2orF3) goto unhandled;
4084 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004085 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00004086 /* we have to inform disAMode of any immediate
sewardjcc3d2192013-03-27 11:37:33 +00004087 bytes used */
sewardj901ed122005-02-27 13:25:31 +00004088 gregLO3ofRM(modrm)==0/*TEST*/
sewardj7de0d3c2005-02-13 02:26:41 +00004089 ? imin(4,sz)
4090 : 0
4091 );
sewardj32b2bbe2005-01-28 00:50:10 +00004092 t1 = newTemp(ty);
4093 delta += len;
4094 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj901ed122005-02-27 13:25:31 +00004095 switch (gregLO3ofRM(modrm)) {
sewardj03b07cc2005-01-31 18:09:43 +00004096 case 0: { /* TEST */
4097 d64 = getSDisp(imin(4,sz), delta);
4098 delta += imin(4,sz);
4099 dst1 = newTemp(ty);
4100 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4101 mkexpr(t1),
4102 mkU(ty, d64 & mkSizeMask(sz))));
4103 setFlags_DEP1( Iop_And8, dst1, ty );
4104 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4105 break;
4106 }
sewardjfd4203c2007-03-21 00:21:56 +00004107 case 1:
4108 *decode_OK = False;
4109 return delta;
sewardj82c9f2f2005-03-02 16:05:13 +00004110 case 2: /* NOT */
sewardje9d8a262009-07-01 08:06:34 +00004111 dst1 = newTemp(ty);
4112 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
sewardj6491f862013-10-15 17:29:19 +00004113 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004114 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4115 guest_RIP_curr_instr );
4116 } else {
4117 storeLE( mkexpr(addr), mkexpr(dst1) );
4118 }
sewardj82c9f2f2005-03-02 16:05:13 +00004119 DIP("not%c %s\n", nameISize(sz), dis_buf);
4120 break;
sewardj7de0d3c2005-02-13 02:26:41 +00004121 case 3: /* NEG */
4122 dst0 = newTemp(ty);
4123 src = newTemp(ty);
4124 dst1 = newTemp(ty);
4125 assign(dst0, mkU(ty,0));
4126 assign(src, mkexpr(t1));
sewardj2e28ac42008-12-04 00:05:12 +00004127 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4128 mkexpr(src)));
sewardj6491f862013-10-15 17:29:19 +00004129 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004130 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4131 guest_RIP_curr_instr );
4132 } else {
4133 storeLE( mkexpr(addr), mkexpr(dst1) );
4134 }
sewardj7de0d3c2005-02-13 02:26:41 +00004135 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj7de0d3c2005-02-13 02:26:41 +00004136 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4137 break;
sewardj31eecde2005-03-23 03:39:55 +00004138 case 4: /* MUL (unsigned widening) */
4139 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4140 break;
sewardj3aba9eb2005-03-30 23:20:47 +00004141 case 5: /* IMUL */
4142 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4143 break;
sewardj1001dc42005-02-21 08:25:55 +00004144 case 6: /* DIV */
4145 codegen_div ( sz, t1, False );
4146 DIP("div%c %s\n", nameISize(sz), dis_buf);
4147 break;
sewardj82c9f2f2005-03-02 16:05:13 +00004148 case 7: /* IDIV */
4149 codegen_div ( sz, t1, True );
4150 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4151 break;
sewardj32b2bbe2005-01-28 00:50:10 +00004152 default:
sewardjfd4203c2007-03-21 00:21:56 +00004153 /*NOTREACHED*/
4154 vpanic("Grp3(amd64,M)");
sewardj32b2bbe2005-01-28 00:50:10 +00004155 }
4156 }
4157 return delta;
sewardj38b1d692013-10-15 17:21:42 +00004158 unhandled:
4159 *decode_OK = False;
4160 return delta;
sewardj32b2bbe2005-01-28 00:50:10 +00004161}
4162
4163
sewardj38b1d692013-10-15 17:21:42 +00004164/* Group 4 extended opcodes. We have to decide here whether F2 and F3
4165 might be valid. */
sewardj03b07cc2005-01-31 18:09:43 +00004166static
floriancacba8e2014-12-15 18:58:07 +00004167ULong dis_Grp4 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00004168 Prefix pfx, Long delta, Bool* decode_OK )
sewardj03b07cc2005-01-31 18:09:43 +00004169{
4170 Int alen;
4171 UChar modrm;
4172 HChar dis_buf[50];
4173 IRType ty = Ity_I8;
4174 IRTemp t1 = newTemp(ty);
4175 IRTemp t2 = newTemp(ty);
4176
sewardjfd4203c2007-03-21 00:21:56 +00004177 *decode_OK = True;
4178
sewardj03b07cc2005-01-31 18:09:43 +00004179 modrm = getUChar(delta);
4180 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +00004181 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4182 if (haveF2orF3(pfx)) goto unhandled;
sewardj5b470602005-02-27 13:10:48 +00004183 assign(t1, getIRegE(1, pfx, modrm));
sewardj901ed122005-02-27 13:25:31 +00004184 switch (gregLO3ofRM(modrm)) {
sewardj85520e42005-02-19 15:22:38 +00004185 case 0: /* INC */
4186 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004187 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj85520e42005-02-19 15:22:38 +00004188 setFlags_INC_DEC( True, t2, ty );
4189 break;
sewardj03b07cc2005-01-31 18:09:43 +00004190 case 1: /* DEC */
4191 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004192 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj03b07cc2005-01-31 18:09:43 +00004193 setFlags_INC_DEC( False, t2, ty );
4194 break;
4195 default:
sewardjfd4203c2007-03-21 00:21:56 +00004196 *decode_OK = False;
4197 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004198 }
4199 delta++;
sewardj901ed122005-02-27 13:25:31 +00004200 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
sewardj5b470602005-02-27 13:10:48 +00004201 nameIRegE(1, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004202 } else {
sewardj38b1d692013-10-15 17:21:42 +00004203 /* Decide if F2/XACQ or F3/XREL might be valid. */
4204 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4205 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4206 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4207 validF2orF3 = True;
4208 }
4209 if (!validF2orF3) goto unhandled;
4210 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004211 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj03b07cc2005-01-31 18:09:43 +00004212 assign( t1, loadLE(ty, mkexpr(addr)) );
sewardj901ed122005-02-27 13:25:31 +00004213 switch (gregLO3ofRM(modrm)) {
sewardj007e9ec2005-03-23 11:36:48 +00004214 case 0: /* INC */
4215 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardj6491f862013-10-15 17:29:19 +00004216 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004217 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4218 guest_RIP_curr_instr );
4219 } else {
4220 storeLE( mkexpr(addr), mkexpr(t2) );
4221 }
sewardj007e9ec2005-03-23 11:36:48 +00004222 setFlags_INC_DEC( True, t2, ty );
4223 break;
4224 case 1: /* DEC */
4225 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardj6491f862013-10-15 17:29:19 +00004226 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004227 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4228 guest_RIP_curr_instr );
4229 } else {
4230 storeLE( mkexpr(addr), mkexpr(t2) );
4231 }
sewardj007e9ec2005-03-23 11:36:48 +00004232 setFlags_INC_DEC( False, t2, ty );
4233 break;
sewardj03b07cc2005-01-31 18:09:43 +00004234 default:
sewardjfd4203c2007-03-21 00:21:56 +00004235 *decode_OK = False;
4236 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004237 }
4238 delta += alen;
sewardj901ed122005-02-27 13:25:31 +00004239 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
sewardj03b07cc2005-01-31 18:09:43 +00004240 }
4241 return delta;
sewardj38b1d692013-10-15 17:21:42 +00004242 unhandled:
4243 *decode_OK = False;
4244 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004245}
sewardj354e5c62005-01-27 20:12:52 +00004246
4247
sewardj38b1d692013-10-15 17:21:42 +00004248/* Group 5 extended opcodes. We have to decide here whether F2 and F3
4249 might be valid. */
sewardj354e5c62005-01-27 20:12:52 +00004250static
floriancacba8e2014-12-15 18:58:07 +00004251ULong dis_Grp5 ( const VexAbiInfo* vbi,
sewardjfd4203c2007-03-21 00:21:56 +00004252 Prefix pfx, Int sz, Long delta,
sewardjc6f970f2012-04-02 21:54:49 +00004253 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
sewardj354e5c62005-01-27 20:12:52 +00004254{
4255 Int len;
4256 UChar modrm;
4257 HChar dis_buf[50];
4258 IRTemp addr = IRTemp_INVALID;
4259 IRType ty = szToITy(sz);
4260 IRTemp t1 = newTemp(ty);
4261 IRTemp t2 = IRTemp_INVALID;
sewardj03b07cc2005-01-31 18:09:43 +00004262 IRTemp t3 = IRTemp_INVALID;
sewardj7eaa7cf2005-01-31 18:55:22 +00004263 Bool showSz = True;
sewardj354e5c62005-01-27 20:12:52 +00004264
sewardjfd4203c2007-03-21 00:21:56 +00004265 *decode_OK = True;
4266
sewardj8c332e22005-01-28 01:36:56 +00004267 modrm = getUChar(delta);
sewardj354e5c62005-01-27 20:12:52 +00004268 if (epartIsReg(modrm)) {
mjw67ac3fd2014-05-09 11:41:06 +00004269 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4270 F2/CALL and F2/JMP may have bnd prefix. */
4271 if (haveF2orF3(pfx)
4272 && ! (haveF2(pfx)
4273 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4274 goto unhandledR;
sewardj5b470602005-02-27 13:10:48 +00004275 assign(t1, getIRegE(sz,pfx,modrm));
sewardj901ed122005-02-27 13:25:31 +00004276 switch (gregLO3ofRM(modrm)) {
sewardj32b2bbe2005-01-28 00:50:10 +00004277 case 0: /* INC */
4278 t2 = newTemp(ty);
4279 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4280 mkexpr(t1), mkU(ty,1)));
4281 setFlags_INC_DEC( True, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004282 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004283 break;
4284 case 1: /* DEC */
4285 t2 = newTemp(ty);
4286 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4287 mkexpr(t1), mkU(ty,1)));
4288 setFlags_INC_DEC( False, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004289 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004290 break;
sewardj354e5c62005-01-27 20:12:52 +00004291 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004292 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004293 if (!(sz == 4 || sz == 8)) goto unhandledR;
mjw67ac3fd2014-05-09 11:41:06 +00004294 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj03b07cc2005-01-31 18:09:43 +00004295 sz = 8;
4296 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004297 assign(t3, getIRegE(sz,pfx,modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004298 t2 = newTemp(Ity_I64);
4299 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4300 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004301 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
sewardj478646f2008-05-01 20:13:04 +00004302 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
sewardjc6f970f2012-04-02 21:54:49 +00004303 jmp_treg(dres, Ijk_Call, t3);
4304 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004305 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004306 break;
sewardj354e5c62005-01-27 20:12:52 +00004307 case 4: /* jmp Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004308 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004309 if (!(sz == 4 || sz == 8)) goto unhandledR;
mjw67ac3fd2014-05-09 11:41:06 +00004310 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj03b07cc2005-01-31 18:09:43 +00004311 sz = 8;
4312 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004313 assign(t3, getIRegE(sz,pfx,modrm));
sewardjc6f970f2012-04-02 21:54:49 +00004314 jmp_treg(dres, Ijk_Boring, t3);
4315 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004316 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004317 break;
sewardj66e40ae2013-10-14 21:47:14 +00004318 case 6: /* PUSH Ev */
4319 /* There is no encoding for 32-bit operand size; hence ... */
4320 if (sz == 4) sz = 8;
4321 if (sz == 8 || sz == 2) {
4322 ty = szToITy(sz); /* redo it, since sz might have changed */
4323 t3 = newTemp(ty);
4324 assign(t3, getIRegE(sz,pfx,modrm));
4325 t2 = newTemp(Ity_I64);
4326 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4327 putIReg64(R_RSP, mkexpr(t2) );
4328 storeLE( mkexpr(t2), mkexpr(t3) );
4329 break;
4330 } else {
4331 goto unhandledR; /* awaiting test case */
4332 }
4333 default:
4334 unhandledR:
sewardjfd4203c2007-03-21 00:21:56 +00004335 *decode_OK = False;
4336 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004337 }
4338 delta++;
sewardj901ed122005-02-27 13:25:31 +00004339 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004340 showSz ? nameISize(sz) : ' ',
sewardj5b470602005-02-27 13:10:48 +00004341 nameIRegE(sz, pfx, modrm));
sewardj354e5c62005-01-27 20:12:52 +00004342 } else {
mjw67ac3fd2014-05-09 11:41:06 +00004343 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
sewardj38b1d692013-10-15 17:21:42 +00004344 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4345 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4346 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4347 validF2orF3 = True;
mjw67ac3fd2014-05-09 11:41:06 +00004348 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4349 && (haveF2(pfx) && !haveF3(pfx))) {
4350 validF2orF3 = True;
sewardj38b1d692013-10-15 17:21:42 +00004351 }
4352 if (!validF2orF3) goto unhandledM;
4353 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004354 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj901ed122005-02-27 13:25:31 +00004355 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4356 && gregLO3ofRM(modrm) != 6) {
sewardj03b07cc2005-01-31 18:09:43 +00004357 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj909c06d2005-02-19 22:47:41 +00004358 }
sewardj901ed122005-02-27 13:25:31 +00004359 switch (gregLO3ofRM(modrm)) {
sewardj354e5c62005-01-27 20:12:52 +00004360 case 0: /* INC */
sewardj354e5c62005-01-27 20:12:52 +00004361 t2 = newTemp(ty);
4362 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4363 mkexpr(t1), mkU(ty,1)));
sewardj6491f862013-10-15 17:29:19 +00004364 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004365 casLE( mkexpr(addr),
4366 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4367 } else {
4368 storeLE(mkexpr(addr),mkexpr(t2));
4369 }
sewardj354e5c62005-01-27 20:12:52 +00004370 setFlags_INC_DEC( True, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004371 break;
sewardj354e5c62005-01-27 20:12:52 +00004372 case 1: /* DEC */
4373 t2 = newTemp(ty);
4374 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4375 mkexpr(t1), mkU(ty,1)));
sewardj6491f862013-10-15 17:29:19 +00004376 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004377 casLE( mkexpr(addr),
4378 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4379 } else {
4380 storeLE(mkexpr(addr),mkexpr(t2));
4381 }
sewardj354e5c62005-01-27 20:12:52 +00004382 setFlags_INC_DEC( False, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004383 break;
4384 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004385 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004386 if (!(sz == 4 || sz == 8)) goto unhandledM;
mjw67ac3fd2014-05-09 11:41:06 +00004387 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj7eaa7cf2005-01-31 18:55:22 +00004388 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004389 t3 = newTemp(Ity_I64);
4390 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4391 t2 = newTemp(Ity_I64);
4392 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4393 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004394 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
sewardj478646f2008-05-01 20:13:04 +00004395 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
sewardjc6f970f2012-04-02 21:54:49 +00004396 jmp_treg(dres, Ijk_Call, t3);
4397 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004398 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004399 break;
sewardj354e5c62005-01-27 20:12:52 +00004400 case 4: /* JMP Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004401 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004402 if (!(sz == 4 || sz == 8)) goto unhandledM;
mjw67ac3fd2014-05-09 11:41:06 +00004403 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj7eaa7cf2005-01-31 18:55:22 +00004404 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004405 t3 = newTemp(Ity_I64);
4406 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
sewardjc6f970f2012-04-02 21:54:49 +00004407 jmp_treg(dres, Ijk_Boring, t3);
4408 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004409 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004410 break;
sewardj354e5c62005-01-27 20:12:52 +00004411 case 6: /* PUSH Ev */
sewardja6b93d12005-02-17 09:28:28 +00004412 /* There is no encoding for 32-bit operand size; hence ... */
4413 if (sz == 4) sz = 8;
sewardj66e40ae2013-10-14 21:47:14 +00004414 if (sz == 8 || sz == 2) {
4415 ty = szToITy(sz); /* redo it, since sz might have changed */
4416 t3 = newTemp(ty);
4417 assign(t3, loadLE(ty,mkexpr(addr)));
sewardj909c06d2005-02-19 22:47:41 +00004418 t2 = newTemp(Ity_I64);
4419 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4420 putIReg64(R_RSP, mkexpr(t2) );
4421 storeLE( mkexpr(t2), mkexpr(t3) );
4422 break;
sewardjcc3d2192013-03-27 11:37:33 +00004423 } else {
sewardj66e40ae2013-10-14 21:47:14 +00004424 goto unhandledM; /* awaiting test case */
sewardjcc3d2192013-03-27 11:37:33 +00004425 }
sewardj354e5c62005-01-27 20:12:52 +00004426 default:
sewardj66e40ae2013-10-14 21:47:14 +00004427 unhandledM:
sewardjfd4203c2007-03-21 00:21:56 +00004428 *decode_OK = False;
4429 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004430 }
4431 delta += len;
sewardj901ed122005-02-27 13:25:31 +00004432 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004433 showSz ? nameISize(sz) : ' ',
4434 dis_buf);
sewardj354e5c62005-01-27 20:12:52 +00004435 }
4436 return delta;
4437}
4438
4439
sewardjd0a12df2005-02-10 02:07:43 +00004440/*------------------------------------------------------------*/
4441/*--- Disassembling string ops (including REP prefixes) ---*/
4442/*------------------------------------------------------------*/
4443
4444/* Code shared by all the string ops */
4445static
4446void dis_string_op_increment ( Int sz, IRTemp t_inc )
4447{
4448 UChar logSz;
4449 if (sz == 8 || sz == 4 || sz == 2) {
4450 logSz = 1;
4451 if (sz == 4) logSz = 2;
4452 if (sz == 8) logSz = 3;
4453 assign( t_inc,
4454 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4455 mkU8(logSz) ) );
4456 } else {
4457 assign( t_inc,
4458 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4459 }
4460}
4461
sewardj909c06d2005-02-19 22:47:41 +00004462static
tom5cdf4e32011-08-12 15:42:56 +00004463void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
florian55085f82012-11-21 00:36:55 +00004464 Int sz, const HChar* name, Prefix pfx )
sewardj909c06d2005-02-19 22:47:41 +00004465{
4466 IRTemp t_inc = newTemp(Ity_I64);
4467 /* Really we ought to inspect the override prefixes, but we don't.
4468 The following assertion catches any resulting sillyness. */
4469 vassert(pfx == clearSegBits(pfx));
4470 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004471 dis_OP( sz, t_inc, pfx );
sewardj909c06d2005-02-19 22:47:41 +00004472 DIP("%s%c\n", name, nameISize(sz));
4473}
4474
4475static
tom5cdf4e32011-08-12 15:42:56 +00004476void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj909c06d2005-02-19 22:47:41 +00004477{
4478 IRType ty = szToITy(sz);
4479 IRTemp td = newTemp(Ity_I64); /* RDI */
4480 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004481 IRExpr *incd, *incs;
sewardj909c06d2005-02-19 22:47:41 +00004482
tom5cdf4e32011-08-12 15:42:56 +00004483 if (haveASO(pfx)) {
4484 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4485 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4486 } else {
4487 assign( td, getIReg64(R_RDI) );
4488 assign( ts, getIReg64(R_RSI) );
4489 }
sewardj909c06d2005-02-19 22:47:41 +00004490
4491 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4492
tom5cdf4e32011-08-12 15:42:56 +00004493 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4494 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4495 if (haveASO(pfx)) {
4496 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4497 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4498 }
4499 putIReg64( R_RDI, incd );
4500 putIReg64( R_RSI, incs );
sewardj909c06d2005-02-19 22:47:41 +00004501}
4502
sewardj0f99be62009-07-22 09:29:13 +00004503static
tom5cdf4e32011-08-12 15:42:56 +00004504void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj0f99be62009-07-22 09:29:13 +00004505{
4506 IRType ty = szToITy(sz);
4507 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004508 IRExpr *incs;
sewardj0f99be62009-07-22 09:29:13 +00004509
tom5cdf4e32011-08-12 15:42:56 +00004510 if (haveASO(pfx))
4511 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4512 else
4513 assign( ts, getIReg64(R_RSI) );
sewardj0f99be62009-07-22 09:29:13 +00004514
4515 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4516
tom5cdf4e32011-08-12 15:42:56 +00004517 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4518 if (haveASO(pfx))
4519 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4520 putIReg64( R_RSI, incs );
sewardj0f99be62009-07-22 09:29:13 +00004521}
sewardja6b93d12005-02-17 09:28:28 +00004522
4523static
tom5cdf4e32011-08-12 15:42:56 +00004524void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardja6b93d12005-02-17 09:28:28 +00004525{
4526 IRType ty = szToITy(sz);
4527 IRTemp ta = newTemp(ty); /* rAX */
4528 IRTemp td = newTemp(Ity_I64); /* RDI */
tom5cdf4e32011-08-12 15:42:56 +00004529 IRExpr *incd;
sewardja6b93d12005-02-17 09:28:28 +00004530
sewardj5b470602005-02-27 13:10:48 +00004531 assign( ta, getIRegRAX(sz) );
sewardja6b93d12005-02-17 09:28:28 +00004532
tom5cdf4e32011-08-12 15:42:56 +00004533 if (haveASO(pfx))
4534 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4535 else
4536 assign( td, getIReg64(R_RDI) );
sewardja6b93d12005-02-17 09:28:28 +00004537
4538 storeLE( mkexpr(td), mkexpr(ta) );
4539
tom5cdf4e32011-08-12 15:42:56 +00004540 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4541 if (haveASO(pfx))
4542 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4543 putIReg64( R_RDI, incd );
sewardja6b93d12005-02-17 09:28:28 +00004544}
sewardjd0a12df2005-02-10 02:07:43 +00004545
4546static
tom5cdf4e32011-08-12 15:42:56 +00004547void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004548{
4549 IRType ty = szToITy(sz);
4550 IRTemp tdv = newTemp(ty); /* (RDI) */
4551 IRTemp tsv = newTemp(ty); /* (RSI) */
4552 IRTemp td = newTemp(Ity_I64); /* RDI */
4553 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004554 IRExpr *incd, *incs;
sewardjd0a12df2005-02-10 02:07:43 +00004555
tom5cdf4e32011-08-12 15:42:56 +00004556 if (haveASO(pfx)) {
4557 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4558 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4559 } else {
4560 assign( td, getIReg64(R_RDI) );
4561 assign( ts, getIReg64(R_RSI) );
4562 }
sewardjd0a12df2005-02-10 02:07:43 +00004563
4564 assign( tdv, loadLE(ty,mkexpr(td)) );
4565
4566 assign( tsv, loadLE(ty,mkexpr(ts)) );
4567
4568 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4569
tom5cdf4e32011-08-12 15:42:56 +00004570 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4571 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4572 if (haveASO(pfx)) {
4573 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4574 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4575 }
4576 putIReg64( R_RDI, incd );
4577 putIReg64( R_RSI, incs );
sewardjd0a12df2005-02-10 02:07:43 +00004578}
4579
sewardj85520e42005-02-19 15:22:38 +00004580static
tom5cdf4e32011-08-12 15:42:56 +00004581void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj85520e42005-02-19 15:22:38 +00004582{
4583 IRType ty = szToITy(sz);
4584 IRTemp ta = newTemp(ty); /* rAX */
4585 IRTemp td = newTemp(Ity_I64); /* RDI */
4586 IRTemp tdv = newTemp(ty); /* (RDI) */
tom5cdf4e32011-08-12 15:42:56 +00004587 IRExpr *incd;
sewardj85520e42005-02-19 15:22:38 +00004588
sewardj5b470602005-02-27 13:10:48 +00004589 assign( ta, getIRegRAX(sz) );
sewardj85520e42005-02-19 15:22:38 +00004590
tom5cdf4e32011-08-12 15:42:56 +00004591 if (haveASO(pfx))
4592 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4593 else
4594 assign( td, getIReg64(R_RDI) );
sewardj85520e42005-02-19 15:22:38 +00004595
4596 assign( tdv, loadLE(ty,mkexpr(td)) );
4597
4598 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4599
tom5cdf4e32011-08-12 15:42:56 +00004600 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4601 if (haveASO(pfx))
4602 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4603 putIReg64( R_RDI, incd );
sewardj85520e42005-02-19 15:22:38 +00004604}
sewardjd0a12df2005-02-10 02:07:43 +00004605
4606
4607/* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4608 the insn is the last one in the basic block, and so emit a jump to
4609 the next insn, rather than just falling through. */
4610static
sewardjc6f970f2012-04-02 21:54:49 +00004611void dis_REP_op ( /*MOD*/DisResult* dres,
4612 AMD64Condcode cond,
tom5cdf4e32011-08-12 15:42:56 +00004613 void (*dis_OP)(Int, IRTemp, Prefix),
florian55085f82012-11-21 00:36:55 +00004614 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
sewardj909c06d2005-02-19 22:47:41 +00004615 Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004616{
4617 IRTemp t_inc = newTemp(Ity_I64);
tom5cdf4e32011-08-12 15:42:56 +00004618 IRTemp tc;
4619 IRExpr* cmp;
sewardjd0a12df2005-02-10 02:07:43 +00004620
sewardj909c06d2005-02-19 22:47:41 +00004621 /* Really we ought to inspect the override prefixes, but we don't.
4622 The following assertion catches any resulting sillyness. */
4623 vassert(pfx == clearSegBits(pfx));
4624
tom5cdf4e32011-08-12 15:42:56 +00004625 if (haveASO(pfx)) {
4626 tc = newTemp(Ity_I32); /* ECX */
4627 assign( tc, getIReg32(R_RCX) );
4628 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4629 } else {
4630 tc = newTemp(Ity_I64); /* RCX */
4631 assign( tc, getIReg64(R_RCX) );
4632 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4633 }
sewardjd0a12df2005-02-10 02:07:43 +00004634
sewardjc6f970f2012-04-02 21:54:49 +00004635 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4636 IRConst_U64(rip_next), OFFB_RIP ) );
sewardjd0a12df2005-02-10 02:07:43 +00004637
tom5cdf4e32011-08-12 15:42:56 +00004638 if (haveASO(pfx))
4639 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4640 else
4641 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
sewardjd0a12df2005-02-10 02:07:43 +00004642
4643 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004644 dis_OP (sz, t_inc, pfx);
sewardjd0a12df2005-02-10 02:07:43 +00004645
4646 if (cond == AMD64CondAlways) {
sewardjc6f970f2012-04-02 21:54:49 +00004647 jmp_lit(dres, Ijk_Boring, rip);
4648 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004649 } else {
4650 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4651 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00004652 IRConst_U64(rip),
4653 OFFB_RIP ) );
4654 jmp_lit(dres, Ijk_Boring, rip_next);
4655 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004656 }
4657 DIP("%s%c\n", name, nameISize(sz));
4658}
sewardj32b2bbe2005-01-28 00:50:10 +00004659
4660
4661/*------------------------------------------------------------*/
4662/*--- Arithmetic, etc. ---*/
4663/*------------------------------------------------------------*/
4664
4665/* IMUL E, G. Supplied eip points to the modR/M byte. */
4666static
floriancacba8e2014-12-15 18:58:07 +00004667ULong dis_mul_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00004668 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004669 Int size,
sewardj270def42005-07-03 01:03:01 +00004670 Long delta0 )
sewardj32b2bbe2005-01-28 00:50:10 +00004671{
4672 Int alen;
4673 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004674 UChar rm = getUChar(delta0);
sewardj32b2bbe2005-01-28 00:50:10 +00004675 IRType ty = szToITy(size);
4676 IRTemp te = newTemp(ty);
4677 IRTemp tg = newTemp(ty);
4678 IRTemp resLo = newTemp(ty);
4679
sewardj5b470602005-02-27 13:10:48 +00004680 assign( tg, getIRegG(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004681 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004682 assign( te, getIRegE(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004683 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004684 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
sewardj32b2bbe2005-01-28 00:50:10 +00004685 assign( te, loadLE(ty,mkexpr(addr)) );
4686 }
4687
4688 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4689
4690 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4691
sewardj5b470602005-02-27 13:10:48 +00004692 putIRegG(size, pfx, rm, mkexpr(resLo) );
sewardj32b2bbe2005-01-28 00:50:10 +00004693
4694 if (epartIsReg(rm)) {
4695 DIP("imul%c %s, %s\n", nameISize(size),
sewardj901ed122005-02-27 13:25:31 +00004696 nameIRegE(size,pfx,rm),
4697 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004698 return 1+delta0;
4699 } else {
4700 DIP("imul%c %s, %s\n", nameISize(size),
4701 dis_buf,
sewardj901ed122005-02-27 13:25:31 +00004702 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004703 return alen+delta0;
4704 }
4705}
4706
4707
4708/* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4709static
floriancacba8e2014-12-15 18:58:07 +00004710ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00004711 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004712 Int size,
sewardj270def42005-07-03 01:03:01 +00004713 Long delta,
sewardj32b2bbe2005-01-28 00:50:10 +00004714 Int litsize )
4715{
4716 Long d64;
4717 Int alen;
4718 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004719 UChar rm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00004720 IRType ty = szToITy(size);
4721 IRTemp te = newTemp(ty);
4722 IRTemp tl = newTemp(ty);
4723 IRTemp resLo = newTemp(ty);
4724
sewardj85520e42005-02-19 15:22:38 +00004725 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
sewardj32b2bbe2005-01-28 00:50:10 +00004726
4727 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004728 assign(te, getIRegE(size, pfx, rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004729 delta++;
4730 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004731 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00004732 imin(4,litsize) );
sewardj32b2bbe2005-01-28 00:50:10 +00004733 assign(te, loadLE(ty, mkexpr(addr)));
4734 delta += alen;
4735 }
4736 d64 = getSDisp(imin(4,litsize),delta);
4737 delta += imin(4,litsize);
4738
sewardj1389d4d2005-01-28 13:46:29 +00004739 d64 &= mkSizeMask(size);
sewardj32b2bbe2005-01-28 00:50:10 +00004740 assign(tl, mkU(ty,d64));
4741
4742 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4743
4744 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4745
sewardj5b470602005-02-27 13:10:48 +00004746 putIRegG(size, pfx, rm, mkexpr(resLo));
sewardj32b2bbe2005-01-28 00:50:10 +00004747
4748 DIP("imul%c $%lld, %s, %s\n",
4749 nameISize(size), d64,
sewardj5b470602005-02-27 13:10:48 +00004750 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4751 nameIRegG(size,pfx,rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004752 return delta;
4753}
4754
4755
sewardjfd181282010-06-14 21:29:35 +00004756/* Generate an IR sequence to do a popcount operation on the supplied
4757 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4758 Ity_I16, Ity_I32 or Ity_I64 only. */
4759static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4760{
4761 Int i;
4762 if (ty == Ity_I16) {
4763 IRTemp old = IRTemp_INVALID;
4764 IRTemp nyu = IRTemp_INVALID;
4765 IRTemp mask[4], shift[4];
4766 for (i = 0; i < 4; i++) {
4767 mask[i] = newTemp(ty);
4768 shift[i] = 1 << i;
4769 }
4770 assign(mask[0], mkU16(0x5555));
4771 assign(mask[1], mkU16(0x3333));
4772 assign(mask[2], mkU16(0x0F0F));
4773 assign(mask[3], mkU16(0x00FF));
4774 old = src;
4775 for (i = 0; i < 4; i++) {
4776 nyu = newTemp(ty);
4777 assign(nyu,
4778 binop(Iop_Add16,
4779 binop(Iop_And16,
4780 mkexpr(old),
4781 mkexpr(mask[i])),
4782 binop(Iop_And16,
4783 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4784 mkexpr(mask[i]))));
4785 old = nyu;
4786 }
4787 return nyu;
4788 }
4789 if (ty == Ity_I32) {
4790 IRTemp old = IRTemp_INVALID;
4791 IRTemp nyu = IRTemp_INVALID;
4792 IRTemp mask[5], shift[5];
4793 for (i = 0; i < 5; i++) {
4794 mask[i] = newTemp(ty);
4795 shift[i] = 1 << i;
4796 }
4797 assign(mask[0], mkU32(0x55555555));
4798 assign(mask[1], mkU32(0x33333333));
4799 assign(mask[2], mkU32(0x0F0F0F0F));
4800 assign(mask[3], mkU32(0x00FF00FF));
4801 assign(mask[4], mkU32(0x0000FFFF));
4802 old = src;
4803 for (i = 0; i < 5; i++) {
4804 nyu = newTemp(ty);
4805 assign(nyu,
4806 binop(Iop_Add32,
4807 binop(Iop_And32,
4808 mkexpr(old),
4809 mkexpr(mask[i])),
4810 binop(Iop_And32,
4811 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4812 mkexpr(mask[i]))));
4813 old = nyu;
4814 }
4815 return nyu;
4816 }
4817 if (ty == Ity_I64) {
4818 IRTemp old = IRTemp_INVALID;
4819 IRTemp nyu = IRTemp_INVALID;
4820 IRTemp mask[6], shift[6];
4821 for (i = 0; i < 6; i++) {
4822 mask[i] = newTemp(ty);
4823 shift[i] = 1 << i;
4824 }
4825 assign(mask[0], mkU64(0x5555555555555555ULL));
4826 assign(mask[1], mkU64(0x3333333333333333ULL));
4827 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4828 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4829 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4830 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4831 old = src;
4832 for (i = 0; i < 6; i++) {
4833 nyu = newTemp(ty);
4834 assign(nyu,
4835 binop(Iop_Add64,
4836 binop(Iop_And64,
4837 mkexpr(old),
4838 mkexpr(mask[i])),
4839 binop(Iop_And64,
4840 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4841 mkexpr(mask[i]))));
4842 old = nyu;
4843 }
4844 return nyu;
4845 }
4846 /*NOTREACHED*/
4847 vassert(0);
4848}
4849
4850
sewardj9a660ea2010-07-29 11:34:38 +00004851/* Generate an IR sequence to do a count-leading-zeroes operation on
4852 the supplied IRTemp, and return a new IRTemp holding the result.
4853 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4854 the argument is zero, return the number of bits in the word (the
4855 natural semantics). */
4856static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4857{
4858 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4859
4860 IRTemp src64 = newTemp(Ity_I64);
4861 assign(src64, widenUto64( mkexpr(src) ));
4862
4863 IRTemp src64x = newTemp(Ity_I64);
4864 assign(src64x,
4865 binop(Iop_Shl64, mkexpr(src64),
4866 mkU8(64 - 8 * sizeofIRType(ty))));
4867
4868 // Clz64 has undefined semantics when its input is zero, so
4869 // special-case around that.
4870 IRTemp res64 = newTemp(Ity_I64);
4871 assign(res64,
florian99dd03e2013-01-29 03:56:06 +00004872 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00004873 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
florian99dd03e2013-01-29 03:56:06 +00004874 mkU64(8 * sizeofIRType(ty)),
4875 unop(Iop_Clz64, mkexpr(src64x))
sewardj9a660ea2010-07-29 11:34:38 +00004876 ));
4877
4878 IRTemp res = newTemp(ty);
4879 assign(res, narrowTo(ty, mkexpr(res64)));
4880 return res;
4881}
4882
4883
sewardjcc3d2192013-03-27 11:37:33 +00004884/* Generate an IR sequence to do a count-trailing-zeroes operation on
4885 the supplied IRTemp, and return a new IRTemp holding the result.
4886 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4887 the argument is zero, return the number of bits in the word (the
4888 natural semantics). */
4889static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
4890{
4891 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4892
4893 IRTemp src64 = newTemp(Ity_I64);
4894 assign(src64, widenUto64( mkexpr(src) ));
4895
4896 // Ctz64 has undefined semantics when its input is zero, so
4897 // special-case around that.
4898 IRTemp res64 = newTemp(Ity_I64);
4899 assign(res64,
4900 IRExpr_ITE(
4901 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
4902 mkU64(8 * sizeofIRType(ty)),
4903 unop(Iop_Ctz64, mkexpr(src64))
4904 ));
4905
4906 IRTemp res = newTemp(ty);
4907 assign(res, narrowTo(ty, mkexpr(res64)));
4908 return res;
4909}
4910
4911
sewardjbcbb9de2005-03-27 02:22:32 +00004912/*------------------------------------------------------------*/
4913/*--- ---*/
4914/*--- x87 FLOATING POINT INSTRUCTIONS ---*/
4915/*--- ---*/
4916/*------------------------------------------------------------*/
4917
4918/* --- Helper functions for dealing with the register stack. --- */
4919
4920/* --- Set the emulation-warning pseudo-register. --- */
4921
4922static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
4923{
sewardjdd40fdf2006-12-24 02:20:24 +00004924 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
florian6ef84be2012-08-26 03:20:07 +00004925 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
sewardjbcbb9de2005-03-27 02:22:32 +00004926}
sewardj8d965312005-02-25 02:48:47 +00004927
4928/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
4929
4930static IRExpr* mkQNaN64 ( void )
4931{
4932 /* QNaN is 0 2047 1 0(51times)
4933 == 0b 11111111111b 1 0(51times)
4934 == 0x7FF8 0000 0000 0000
4935 */
4936 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
4937}
4938
4939/* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
4940
4941static IRExpr* get_ftop ( void )
4942{
4943 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
4944}
4945
4946static void put_ftop ( IRExpr* e )
4947{
sewardjdd40fdf2006-12-24 02:20:24 +00004948 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00004949 stmt( IRStmt_Put( OFFB_FTOP, e ) );
4950}
4951
sewardj25a85812005-05-08 23:03:48 +00004952/* --------- Get/put the C3210 bits. --------- */
4953
4954static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
4955{
4956 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
4957}
4958
4959static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
4960{
sewardjdd40fdf2006-12-24 02:20:24 +00004961 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
sewardj25a85812005-05-08 23:03:48 +00004962 stmt( IRStmt_Put( OFFB_FC3210, e ) );
4963}
sewardjc49ce232005-02-25 13:03:03 +00004964
4965/* --------- Get/put the FPU rounding mode. --------- */
4966static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
4967{
4968 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
4969}
4970
sewardj5e205372005-05-09 02:57:08 +00004971static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
4972{
sewardjdd40fdf2006-12-24 02:20:24 +00004973 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj5e205372005-05-09 02:57:08 +00004974 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
4975}
sewardjc49ce232005-02-25 13:03:03 +00004976
4977
4978/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
4979/* Produces a value in 0 .. 3, which is encoded as per the type
4980 IRRoundingMode. Since the guest_FPROUND value is also encoded as
4981 per IRRoundingMode, we merely need to get it and mask it for
4982 safety.
4983*/
4984static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
4985{
4986 return binop( Iop_And32, get_fpround(), mkU32(3) );
4987}
sewardj8d965312005-02-25 02:48:47 +00004988
sewardj4796d662006-02-05 16:06:26 +00004989static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
4990{
4991 return mkU32(Irrm_NEAREST);
4992}
4993
sewardj8d965312005-02-25 02:48:47 +00004994
4995/* --------- Get/set FP register tag bytes. --------- */
4996
4997/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
4998
4999static void put_ST_TAG ( Int i, IRExpr* value )
5000{
sewardjdd40fdf2006-12-24 02:20:24 +00005001 IRRegArray* descr;
5002 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5003 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
floriand6f38b32012-05-31 15:46:18 +00005004 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00005005}
5006
5007/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5008 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5009
5010static IRExpr* get_ST_TAG ( Int i )
5011{
sewardjdd40fdf2006-12-24 02:20:24 +00005012 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
sewardj8d965312005-02-25 02:48:47 +00005013 return IRExpr_GetI( descr, get_ftop(), i );
5014}
5015
5016
5017/* --------- Get/set FP registers. --------- */
5018
5019/* Given i, and some expression e, emit 'ST(i) = e' and set the
5020 register's tag to indicate the register is full. The previous
5021 state of the register is not checked. */
5022
5023static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5024{
sewardjdd40fdf2006-12-24 02:20:24 +00005025 IRRegArray* descr;
5026 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5027 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
floriand6f38b32012-05-31 15:46:18 +00005028 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00005029 /* Mark the register as in-use. */
5030 put_ST_TAG(i, mkU8(1));
5031}
5032
5033/* Given i, and some expression e, emit
5034 ST(i) = is_full(i) ? NaN : e
5035 and set the tag accordingly.
5036*/
5037
5038static void put_ST ( Int i, IRExpr* value )
5039{
sewardj009230b2013-01-26 11:47:55 +00005040 put_ST_UNCHECKED(
5041 i,
florian99dd03e2013-01-29 03:56:06 +00005042 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5043 /* non-0 means full */
5044 mkQNaN64(),
5045 /* 0 means empty */
5046 value
sewardj009230b2013-01-26 11:47:55 +00005047 )
sewardj8d965312005-02-25 02:48:47 +00005048 );
5049}
5050
5051
5052/* Given i, generate an expression yielding 'ST(i)'. */
5053
5054static IRExpr* get_ST_UNCHECKED ( Int i )
5055{
sewardjdd40fdf2006-12-24 02:20:24 +00005056 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
sewardj8d965312005-02-25 02:48:47 +00005057 return IRExpr_GetI( descr, get_ftop(), i );
5058}
5059
5060
5061/* Given i, generate an expression yielding
5062 is_full(i) ? ST(i) : NaN
5063*/
5064
5065static IRExpr* get_ST ( Int i )
5066{
5067 return
florian99dd03e2013-01-29 03:56:06 +00005068 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5069 /* non-0 means full */
5070 get_ST_UNCHECKED(i),
5071 /* 0 means empty */
5072 mkQNaN64());
sewardj8d965312005-02-25 02:48:47 +00005073}
5074
5075
sewardje9c51c92014-04-30 22:50:34 +00005076/* Given i, and some expression e, and a condition cond, generate IR
5077 which has the same effect as put_ST(i,e) when cond is true and has
5078 no effect when cond is false. Given the lack of proper
5079 if-then-else in the IR, this is pretty tricky.
5080*/
5081
5082static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5083{
5084 // new_tag = if cond then FULL else old_tag
5085 // new_val = if cond then (if old_tag==FULL then NaN else val)
5086 // else old_val
5087
5088 IRTemp old_tag = newTemp(Ity_I8);
5089 assign(old_tag, get_ST_TAG(i));
5090 IRTemp new_tag = newTemp(Ity_I8);
5091 assign(new_tag,
5092 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5093
5094 IRTemp old_val = newTemp(Ity_F64);
5095 assign(old_val, get_ST_UNCHECKED(i));
5096 IRTemp new_val = newTemp(Ity_F64);
5097 assign(new_val,
5098 IRExpr_ITE(mkexpr(cond),
5099 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5100 /* non-0 means full */
5101 mkQNaN64(),
5102 /* 0 means empty */
5103 value),
5104 mkexpr(old_val)));
5105
5106 put_ST_UNCHECKED(i, mkexpr(new_val));
5107 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5108 // now set it to new_tag instead.
5109 put_ST_TAG(i, mkexpr(new_tag));
5110}
5111
sewardj8d965312005-02-25 02:48:47 +00005112/* Adjust FTOP downwards by one register. */
5113
5114static void fp_push ( void )
5115{
5116 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5117}
5118
sewardje9c51c92014-04-30 22:50:34 +00005119/* Adjust FTOP downwards by one register when COND is 1:I1. Else
5120 don't change it. */
5121
5122static void maybe_fp_push ( IRTemp cond )
5123{
5124 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5125}
5126
sewardj8d965312005-02-25 02:48:47 +00005127/* Adjust FTOP upwards by one register, and mark the vacated register
5128 as empty. */
5129
5130static void fp_pop ( void )
5131{
5132 put_ST_TAG(0, mkU8(0));
5133 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5134}
5135
sewardje9c51c92014-04-30 22:50:34 +00005136/* Set the C2 bit of the FPU status register to e[0]. Assumes that
5137 e[31:1] == 0.
5138*/
5139static void set_C2 ( IRExpr* e )
sewardj25a85812005-05-08 23:03:48 +00005140{
sewardje9c51c92014-04-30 22:50:34 +00005141 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5142 put_C3210( binop(Iop_Or64,
5143 cleared,
5144 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5145}
5146
5147/* Generate code to check that abs(d64) < 2^63 and is finite. This is
5148 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5149 test is simple, but the derivation of it is not so simple.
5150
5151 The exponent field for an IEEE754 double is 11 bits. That means it
5152 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5153 the number is either a NaN or an Infinity and so is not finite.
5154 Furthermore, a finite value of exactly 2^63 is the smallest value
5155 that has exponent value 0x43E. Hence, what we need to do is
5156 extract the exponent, ignoring the sign bit and mantissa, and check
5157 it is < 0x43E, or <= 0x43D.
5158
5159 To make this easily applicable to 32- and 64-bit targets, a
5160 roundabout approach is used. First the number is converted to I64,
5161 then the top 32 bits are taken. Shifting them right by 20 bits
5162 places the sign bit and exponent in the bottom 12 bits. Anding
5163 with 0x7FF gets rid of the sign bit, leaving just the exponent
5164 available for comparison.
5165*/
5166static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5167{
5168 IRTemp i64 = newTemp(Ity_I64);
5169 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5170 IRTemp exponent = newTemp(Ity_I32);
5171 assign(exponent,
5172 binop(Iop_And32,
5173 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5174 mkU32(0x7FF)));
5175 IRTemp in_range_and_finite = newTemp(Ity_I1);
5176 assign(in_range_and_finite,
5177 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5178 return in_range_and_finite;
sewardj25a85812005-05-08 23:03:48 +00005179}
sewardj48a89d82005-05-06 11:50:13 +00005180
sewardj7c2d2822006-03-07 00:22:02 +00005181/* Invent a plausible-looking FPU status word value:
5182 ((ftop & 7) << 11) | (c3210 & 0x4700)
5183 */
5184static IRExpr* get_FPU_sw ( void )
5185{
5186 return
5187 unop(Iop_32to16,
5188 binop(Iop_Or32,
5189 binop(Iop_Shl32,
5190 binop(Iop_And32, get_ftop(), mkU32(7)),
5191 mkU8(11)),
5192 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5193 mkU32(0x4700))
5194 ));
5195}
5196
sewardj48a89d82005-05-06 11:50:13 +00005197
5198/* ------------------------------------------------------- */
5199/* Given all that stack-mangling junk, we can now go ahead
5200 and describe FP instructions.
5201*/
5202
5203/* ST(0) = ST(0) `op` mem64/32(addr)
5204 Need to check ST(0)'s tag on read, but not on write.
5205*/
5206static
florian55085f82012-11-21 00:36:55 +00005207void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
sewardj48a89d82005-05-06 11:50:13 +00005208 IROp op, Bool dbl )
5209{
5210 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5211 if (dbl) {
5212 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005213 triop( op,
5214 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00005215 get_ST(0),
5216 loadLE(Ity_F64,mkexpr(addr))
5217 ));
5218 } else {
5219 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005220 triop( op,
5221 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00005222 get_ST(0),
5223 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5224 ));
5225 }
5226}
sewardj7bc00082005-03-27 05:08:32 +00005227
5228
5229/* ST(0) = mem64/32(addr) `op` ST(0)
5230 Need to check ST(0)'s tag on read, but not on write.
5231*/
5232static
florian55085f82012-11-21 00:36:55 +00005233void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
sewardj7bc00082005-03-27 05:08:32 +00005234 IROp op, Bool dbl )
5235{
5236 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5237 if (dbl) {
5238 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005239 triop( op,
5240 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00005241 loadLE(Ity_F64,mkexpr(addr)),
5242 get_ST(0)
5243 ));
5244 } else {
5245 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005246 triop( op,
5247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00005248 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5249 get_ST(0)
5250 ));
5251 }
5252}
sewardj37d52572005-02-25 14:22:12 +00005253
5254
5255/* ST(dst) = ST(dst) `op` ST(src).
5256 Check dst and src tags when reading but not on write.
5257*/
5258static
florian55085f82012-11-21 00:36:55 +00005259void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
sewardj37d52572005-02-25 14:22:12 +00005260 Bool pop_after )
5261{
sewardj1027dc22005-02-26 01:55:02 +00005262 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
sewardj37d52572005-02-25 14:22:12 +00005263 put_ST_UNCHECKED(
5264 st_dst,
sewardj4796d662006-02-05 16:06:26 +00005265 triop( op,
5266 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5267 get_ST(st_dst),
5268 get_ST(st_src) )
sewardj37d52572005-02-25 14:22:12 +00005269 );
5270 if (pop_after)
5271 fp_pop();
5272}
5273
sewardj137015d2005-03-27 04:01:15 +00005274/* ST(dst) = ST(src) `op` ST(dst).
5275 Check dst and src tags when reading but not on write.
5276*/
5277static
florian55085f82012-11-21 00:36:55 +00005278void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
sewardj137015d2005-03-27 04:01:15 +00005279 Bool pop_after )
5280{
5281 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5282 put_ST_UNCHECKED(
5283 st_dst,
sewardj4796d662006-02-05 16:06:26 +00005284 triop( op,
5285 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5286 get_ST(st_src),
5287 get_ST(st_dst) )
sewardj137015d2005-03-27 04:01:15 +00005288 );
5289 if (pop_after)
5290 fp_pop();
5291}
sewardjc49ce232005-02-25 13:03:03 +00005292
5293/* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5294static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5295{
sewardj1027dc22005-02-26 01:55:02 +00005296 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
sewardjc49ce232005-02-25 13:03:03 +00005297 /* This is a bit of a hack (and isn't really right). It sets
5298 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5299 documentation implies A and S are unchanged.
5300 */
5301 /* It's also fishy in that it is used both for COMIP and
5302 UCOMIP, and they aren't the same (although similar). */
5303 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5304 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5305 stmt( IRStmt_Put(
5306 OFFB_CC_DEP1,
5307 binop( Iop_And64,
5308 unop( Iop_32Uto64,
5309 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5310 mkU64(0x45)
5311 )));
5312 if (pop_after)
5313 fp_pop();
5314}
sewardj8d965312005-02-25 02:48:47 +00005315
5316
sewardjb707d102007-07-11 22:49:26 +00005317/* returns
5318 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5319*/
5320static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5321{
5322 IRTemp t32 = newTemp(Ity_I32);
5323 assign( t32, e32 );
5324 return
florian99dd03e2013-01-29 03:56:06 +00005325 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00005326 binop(Iop_CmpLT64U,
5327 unop(Iop_32Uto64,
5328 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5329 mkU64(65536)),
florian99dd03e2013-01-29 03:56:06 +00005330 unop(Iop_32to16, mkexpr(t32)),
5331 mkU16( 0x8000 ) );
sewardjb707d102007-07-11 22:49:26 +00005332}
5333
5334
sewardj8d965312005-02-25 02:48:47 +00005335static
sewardjb4fd2e72005-03-23 13:34:11 +00005336ULong dis_FPU ( /*OUT*/Bool* decode_ok,
floriancacba8e2014-12-15 18:58:07 +00005337 const VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardj8d965312005-02-25 02:48:47 +00005338{
5339 Int len;
5340 UInt r_src, r_dst;
5341 HChar dis_buf[50];
5342 IRTemp t1, t2;
5343
5344 /* On entry, delta points at the second byte of the insn (the modrm
5345 byte).*/
5346 UChar first_opcode = getUChar(delta-1);
5347 UChar modrm = getUChar(delta+0);
5348
sewardj37d52572005-02-25 14:22:12 +00005349 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5350
5351 if (first_opcode == 0xD8) {
5352 if (modrm < 0xC0) {
5353
5354 /* bits 5,4,3 are an opcode extension, and the modRM also
5355 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005356 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj7bc00082005-03-27 05:08:32 +00005357 delta += len;
sewardj37d52572005-02-25 14:22:12 +00005358
sewardj901ed122005-02-27 13:25:31 +00005359 switch (gregLO3ofRM(modrm)) {
sewardj37d52572005-02-25 14:22:12 +00005360
sewardj48a89d82005-05-06 11:50:13 +00005361 case 0: /* FADD single-real */
5362 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5363 break;
5364
sewardje6939f02005-05-07 01:01:24 +00005365 case 1: /* FMUL single-real */
5366 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5367 break;
5368
sewardjdb855502012-05-21 21:51:36 +00005369 case 2: /* FCOM single-real */
5370 DIP("fcoms %s\n", dis_buf);
5371 /* This forces C1 to zero, which isn't right. */
5372 /* The AMD documentation suggests that forcing C1 to
5373 zero is correct (Eliot Moss) */
5374 put_C3210(
5375 unop( Iop_32Uto64,
5376 binop( Iop_And32,
5377 binop(Iop_Shl32,
5378 binop(Iop_CmpF64,
5379 get_ST(0),
5380 unop(Iop_F32toF64,
5381 loadLE(Ity_F32,mkexpr(addr)))),
5382 mkU8(8)),
5383 mkU32(0x4500)
5384 )));
5385 break;
5386
5387 case 3: /* FCOMP single-real */
5388 /* The AMD documentation suggests that forcing C1 to
5389 zero is correct (Eliot Moss) */
5390 DIP("fcomps %s\n", dis_buf);
5391 /* This forces C1 to zero, which isn't right. */
5392 put_C3210(
5393 unop( Iop_32Uto64,
5394 binop( Iop_And32,
5395 binop(Iop_Shl32,
5396 binop(Iop_CmpF64,
5397 get_ST(0),
5398 unop(Iop_F32toF64,
5399 loadLE(Ity_F32,mkexpr(addr)))),
5400 mkU8(8)),
5401 mkU32(0x4500)
5402 )));
5403 fp_pop();
5404 break;
sewardje6939f02005-05-07 01:01:24 +00005405
5406 case 4: /* FSUB single-real */
5407 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5408 break;
sewardj7bc00082005-03-27 05:08:32 +00005409
5410 case 5: /* FSUBR single-real */
5411 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5412 break;
5413
sewardje6939f02005-05-07 01:01:24 +00005414 case 6: /* FDIV single-real */
5415 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5416 break;
5417
5418 case 7: /* FDIVR single-real */
5419 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5420 break;
sewardj37d52572005-02-25 14:22:12 +00005421
5422 default:
sewardj901ed122005-02-27 13:25:31 +00005423 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj37d52572005-02-25 14:22:12 +00005424 vex_printf("first_opcode == 0xD8\n");
5425 goto decode_fail;
5426 }
5427 } else {
5428 delta++;
5429 switch (modrm) {
5430
5431 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5432 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5433 break;
5434
sewardj137015d2005-03-27 04:01:15 +00005435 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5436 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5437 break;
5438
sewardj1859ecd2007-02-23 08:48:22 +00005439 /* Dunno if this is right */
5440 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5441 r_dst = (UInt)modrm - 0xD0;
5442 DIP("fcom %%st(0),%%st(%d)\n", r_dst);
5443 /* This forces C1 to zero, which isn't right. */
5444 put_C3210(
5445 unop(Iop_32Uto64,
5446 binop( Iop_And32,
5447 binop(Iop_Shl32,
5448 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5449 mkU8(8)),
5450 mkU32(0x4500)
5451 )));
5452 break;
5453
sewardj90e2e4b2007-05-04 09:41:24 +00005454 /* Dunno if this is right */
5455 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5456 r_dst = (UInt)modrm - 0xD8;
5457 DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
5458 /* This forces C1 to zero, which isn't right. */
5459 put_C3210(
5460 unop(Iop_32Uto64,
5461 binop( Iop_And32,
5462 binop(Iop_Shl32,
5463 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5464 mkU8(8)),
5465 mkU32(0x4500)
5466 )));
5467 fp_pop();
5468 break;
5469
sewardj137015d2005-03-27 04:01:15 +00005470 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5471 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5472 break;
5473
sewardje6939f02005-05-07 01:01:24 +00005474 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5475 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5476 break;
sewardj137015d2005-03-27 04:01:15 +00005477
5478 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5479 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5480 break;
5481
sewardj48a89d82005-05-06 11:50:13 +00005482 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5483 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5484 break;
sewardj37d52572005-02-25 14:22:12 +00005485
5486 default:
5487 goto decode_fail;
5488 }
5489 }
5490 }
sewardj8d965312005-02-25 02:48:47 +00005491
5492 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
sewardj37d52572005-02-25 14:22:12 +00005493 else
sewardj8d965312005-02-25 02:48:47 +00005494 if (first_opcode == 0xD9) {
5495 if (modrm < 0xC0) {
5496
5497 /* bits 5,4,3 are an opcode extension, and the modRM also
5498 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005499 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00005500 delta += len;
5501
sewardj901ed122005-02-27 13:25:31 +00005502 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00005503
sewardjc49ce232005-02-25 13:03:03 +00005504 case 0: /* FLD single-real */
5505 DIP("flds %s\n", dis_buf);
5506 fp_push();
5507 put_ST(0, unop(Iop_F32toF64,
5508 loadLE(Ity_F32, mkexpr(addr))));
5509 break;
5510
5511 case 2: /* FST single-real */
5512 DIP("fsts %s\n", dis_buf);
5513 storeLE(mkexpr(addr),
5514 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5515 break;
5516
5517 case 3: /* FSTP single-real */
5518 DIP("fstps %s\n", dis_buf);
5519 storeLE(mkexpr(addr),
5520 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5521 fp_pop();
5522 break;
5523
sewardj4017a3b2005-06-13 12:17:27 +00005524 case 4: { /* FLDENV m28 */
5525 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00005526 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
sewardj4017a3b2005-06-13 12:17:27 +00005527 IRTemp ew = newTemp(Ity_I32);
5528 IRTemp w64 = newTemp(Ity_I64);
5529 IRDirty* d = unsafeIRDirty_0_N (
5530 0/*regparms*/,
5531 "amd64g_dirtyhelper_FLDENV",
5532 &amd64g_dirtyhelper_FLDENV,
florian90419562013-08-15 20:54:52 +00005533 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj4017a3b2005-06-13 12:17:27 +00005534 );
sewardj74142b82013-08-08 10:28:59 +00005535 d->tmp = w64;
sewardj4017a3b2005-06-13 12:17:27 +00005536 /* declare we're reading memory */
5537 d->mFx = Ifx_Read;
5538 d->mAddr = mkexpr(addr);
5539 d->mSize = 28;
5540
5541 /* declare we're writing guest state */
5542 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005543 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005544
5545 d->fxState[0].fx = Ifx_Write;
5546 d->fxState[0].offset = OFFB_FTOP;
5547 d->fxState[0].size = sizeof(UInt);
5548
5549 d->fxState[1].fx = Ifx_Write;
5550 d->fxState[1].offset = OFFB_FPTAGS;
5551 d->fxState[1].size = 8 * sizeof(UChar);
5552
5553 d->fxState[2].fx = Ifx_Write;
5554 d->fxState[2].offset = OFFB_FPROUND;
5555 d->fxState[2].size = sizeof(ULong);
5556
5557 d->fxState[3].fx = Ifx_Write;
5558 d->fxState[3].offset = OFFB_FC3210;
5559 d->fxState[3].size = sizeof(ULong);
5560
5561 stmt( IRStmt_Dirty(d) );
5562
5563 /* ew contains any emulation warning we may need to
5564 issue. If needed, side-exit to the next insn,
5565 reporting the warning, so that Valgrind's dispatcher
5566 sees the warning. */
sewardjcc3d2192013-03-27 11:37:33 +00005567 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
sewardj4017a3b2005-06-13 12:17:27 +00005568 put_emwarn( mkexpr(ew) );
5569 stmt(
5570 IRStmt_Exit(
5571 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5572 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005573 IRConst_U64( guest_RIP_bbstart+delta ),
5574 OFFB_RIP
sewardj4017a3b2005-06-13 12:17:27 +00005575 )
5576 );
5577
5578 DIP("fldenv %s\n", dis_buf);
5579 break;
5580 }
sewardj5e205372005-05-09 02:57:08 +00005581
5582 case 5: {/* FLDCW */
5583 /* The only thing we observe in the control word is the
5584 rounding mode. Therefore, pass the 16-bit value
5585 (x87 native-format control word) to a clean helper,
5586 getting back a 64-bit value, the lower half of which
5587 is the FPROUND value to store, and the upper half of
5588 which is the emulation-warning token which may be
5589 generated.
5590 */
5591 /* ULong amd64h_check_fldcw ( ULong ); */
5592 IRTemp t64 = newTemp(Ity_I64);
5593 IRTemp ew = newTemp(Ity_I32);
5594 DIP("fldcw %s\n", dis_buf);
5595 assign( t64, mkIRExprCCall(
5596 Ity_I64, 0/*regparms*/,
5597 "amd64g_check_fldcw",
5598 &amd64g_check_fldcw,
5599 mkIRExprVec_1(
5600 unop( Iop_16Uto64,
5601 loadLE(Ity_I16, mkexpr(addr)))
5602 )
5603 )
5604 );
5605
5606 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5607 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5608 put_emwarn( mkexpr(ew) );
5609 /* Finally, if an emulation warning was reported,
5610 side-exit to the next insn, reporting the warning,
5611 so that Valgrind's dispatcher sees the warning. */
5612 stmt(
5613 IRStmt_Exit(
5614 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5615 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005616 IRConst_U64( guest_RIP_bbstart+delta ),
5617 OFFB_RIP
sewardj5e205372005-05-09 02:57:08 +00005618 )
5619 );
5620 break;
5621 }
5622
sewardj4017a3b2005-06-13 12:17:27 +00005623 case 6: { /* FNSTENV m28 */
5624 /* Uses dirty helper:
5625 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5626 IRDirty* d = unsafeIRDirty_0_N (
5627 0/*regparms*/,
5628 "amd64g_dirtyhelper_FSTENV",
5629 &amd64g_dirtyhelper_FSTENV,
florian90419562013-08-15 20:54:52 +00005630 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj4017a3b2005-06-13 12:17:27 +00005631 );
sewardj4017a3b2005-06-13 12:17:27 +00005632 /* declare we're writing memory */
5633 d->mFx = Ifx_Write;
5634 d->mAddr = mkexpr(addr);
5635 d->mSize = 28;
5636
5637 /* declare we're reading guest state */
5638 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005639 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005640
5641 d->fxState[0].fx = Ifx_Read;
5642 d->fxState[0].offset = OFFB_FTOP;
5643 d->fxState[0].size = sizeof(UInt);
5644
5645 d->fxState[1].fx = Ifx_Read;
5646 d->fxState[1].offset = OFFB_FPTAGS;
5647 d->fxState[1].size = 8 * sizeof(UChar);
5648
5649 d->fxState[2].fx = Ifx_Read;
5650 d->fxState[2].offset = OFFB_FPROUND;
5651 d->fxState[2].size = sizeof(ULong);
5652
5653 d->fxState[3].fx = Ifx_Read;
5654 d->fxState[3].offset = OFFB_FC3210;
5655 d->fxState[3].size = sizeof(ULong);
5656
5657 stmt( IRStmt_Dirty(d) );
5658
5659 DIP("fnstenv %s\n", dis_buf);
5660 break;
5661 }
sewardj5e205372005-05-09 02:57:08 +00005662
5663 case 7: /* FNSTCW */
5664 /* Fake up a native x87 FPU control word. The only
5665 thing it depends on is FPROUND[1:0], so call a clean
5666 helper to cook it up. */
sewardj4017a3b2005-06-13 12:17:27 +00005667 /* ULong amd64g_create_fpucw ( ULong fpround ) */
sewardj5e205372005-05-09 02:57:08 +00005668 DIP("fnstcw %s\n", dis_buf);
5669 storeLE(
5670 mkexpr(addr),
5671 unop( Iop_64to16,
5672 mkIRExprCCall(
5673 Ity_I64, 0/*regp*/,
5674 "amd64g_create_fpucw", &amd64g_create_fpucw,
5675 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5676 )
5677 )
5678 );
5679 break;
sewardj8d965312005-02-25 02:48:47 +00005680
5681 default:
sewardj901ed122005-02-27 13:25:31 +00005682 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00005683 vex_printf("first_opcode == 0xD9\n");
5684 goto decode_fail;
5685 }
5686
5687 } else {
5688 delta++;
5689 switch (modrm) {
5690
sewardjc49ce232005-02-25 13:03:03 +00005691 case 0xC0 ... 0xC7: /* FLD %st(?) */
5692 r_src = (UInt)modrm - 0xC0;
sewardj1027dc22005-02-26 01:55:02 +00005693 DIP("fld %%st(%u)\n", r_src);
sewardjc49ce232005-02-25 13:03:03 +00005694 t1 = newTemp(Ity_F64);
5695 assign(t1, get_ST(r_src));
5696 fp_push();
5697 put_ST(0, mkexpr(t1));
5698 break;
sewardj8d965312005-02-25 02:48:47 +00005699
5700 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5701 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00005702 DIP("fxch %%st(%u)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00005703 t1 = newTemp(Ity_F64);
5704 t2 = newTemp(Ity_F64);
5705 assign(t1, get_ST(0));
5706 assign(t2, get_ST(r_src));
5707 put_ST_UNCHECKED(0, mkexpr(t2));
5708 put_ST_UNCHECKED(r_src, mkexpr(t1));
5709 break;
5710
5711 case 0xE0: /* FCHS */
5712 DIP("fchs\n");
5713 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5714 break;
5715
sewardj137015d2005-03-27 04:01:15 +00005716 case 0xE1: /* FABS */
5717 DIP("fabs\n");
5718 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5719 break;
5720
sewardj4f9847d2005-07-25 11:58:34 +00005721 case 0xE5: { /* FXAM */
5722 /* This is an interesting one. It examines %st(0),
5723 regardless of whether the tag says it's empty or not.
5724 Here, just pass both the tag (in our format) and the
5725 value (as a double, actually a ULong) to a helper
5726 function. */
5727 IRExpr** args
5728 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5729 unop(Iop_ReinterpF64asI64,
5730 get_ST_UNCHECKED(0)) );
5731 put_C3210(mkIRExprCCall(
5732 Ity_I64,
5733 0/*regparm*/,
5734 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5735 args
5736 ));
5737 DIP("fxam\n");
5738 break;
5739 }
sewardjc49ce232005-02-25 13:03:03 +00005740
5741 case 0xE8: /* FLD1 */
5742 DIP("fld1\n");
5743 fp_push();
5744 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5745 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5746 break;
5747
sewardj6847d8c2005-05-12 19:21:55 +00005748 case 0xE9: /* FLDL2T */
5749 DIP("fldl2t\n");
5750 fp_push();
5751 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5752 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5753 break;
5754
5755 case 0xEA: /* FLDL2E */
5756 DIP("fldl2e\n");
5757 fp_push();
5758 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5759 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5760 break;
5761
5762 case 0xEB: /* FLDPI */
5763 DIP("fldpi\n");
5764 fp_push();
5765 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5766 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5767 break;
5768
5769 case 0xEC: /* FLDLG2 */
5770 DIP("fldlg2\n");
5771 fp_push();
5772 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5773 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5774 break;
5775
5776 case 0xED: /* FLDLN2 */
5777 DIP("fldln2\n");
5778 fp_push();
5779 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5780 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5781 break;
sewardjc49ce232005-02-25 13:03:03 +00005782
5783 case 0xEE: /* FLDZ */
5784 DIP("fldz\n");
5785 fp_push();
5786 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5787 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5788 break;
5789
sewardj25a85812005-05-08 23:03:48 +00005790 case 0xF0: /* F2XM1 */
5791 DIP("f2xm1\n");
sewardj4796d662006-02-05 16:06:26 +00005792 put_ST_UNCHECKED(0,
5793 binop(Iop_2xm1F64,
5794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5795 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005796 break;
5797
5798 case 0xF1: /* FYL2X */
5799 DIP("fyl2x\n");
sewardj4796d662006-02-05 16:06:26 +00005800 put_ST_UNCHECKED(1,
5801 triop(Iop_Yl2xF64,
5802 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5803 get_ST(1),
5804 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005805 fp_pop();
5806 break;
5807
sewardje9c51c92014-04-30 22:50:34 +00005808 case 0xF2: { /* FPTAN */
5809 DIP("fptan\n");
5810 IRTemp argD = newTemp(Ity_F64);
5811 assign(argD, get_ST(0));
5812 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5813 IRTemp resD = newTemp(Ity_F64);
5814 assign(resD,
5815 IRExpr_ITE(
5816 mkexpr(argOK),
5817 binop(Iop_TanF64,
5818 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5819 mkexpr(argD)),
5820 mkexpr(argD))
5821 );
5822 put_ST_UNCHECKED(0, mkexpr(resD));
5823 /* Conditionally push 1.0 on the stack, if the arg is
5824 in range */
5825 maybe_fp_push(argOK);
5826 maybe_put_ST(argOK, 0,
5827 IRExpr_Const(IRConst_F64(1.0)));
5828 set_C2( binop(Iop_Xor64,
5829 unop(Iop_1Uto64, mkexpr(argOK)),
5830 mkU64(1)) );
sewardj5e205372005-05-09 02:57:08 +00005831 break;
sewardje9c51c92014-04-30 22:50:34 +00005832 }
sewardj25a85812005-05-08 23:03:48 +00005833
5834 case 0xF3: /* FPATAN */
5835 DIP("fpatan\n");
sewardj4796d662006-02-05 16:06:26 +00005836 put_ST_UNCHECKED(1,
5837 triop(Iop_AtanF64,
5838 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5839 get_ST(1),
5840 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005841 fp_pop();
5842 break;
5843
sewardj879cee02006-03-07 01:15:50 +00005844 case 0xF4: { /* FXTRACT */
5845 IRTemp argF = newTemp(Ity_F64);
5846 IRTemp sigF = newTemp(Ity_F64);
5847 IRTemp expF = newTemp(Ity_F64);
5848 IRTemp argI = newTemp(Ity_I64);
5849 IRTemp sigI = newTemp(Ity_I64);
5850 IRTemp expI = newTemp(Ity_I64);
5851 DIP("fxtract\n");
5852 assign( argF, get_ST(0) );
5853 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
5854 assign( sigI,
5855 mkIRExprCCall(
5856 Ity_I64, 0/*regparms*/,
5857 "x86amd64g_calculate_FXTRACT",
5858 &x86amd64g_calculate_FXTRACT,
5859 mkIRExprVec_2( mkexpr(argI),
5860 mkIRExpr_HWord(0)/*sig*/ ))
5861 );
5862 assign( expI,
5863 mkIRExprCCall(
5864 Ity_I64, 0/*regparms*/,
5865 "x86amd64g_calculate_FXTRACT",
5866 &x86amd64g_calculate_FXTRACT,
5867 mkIRExprVec_2( mkexpr(argI),
5868 mkIRExpr_HWord(1)/*exp*/ ))
5869 );
5870 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
5871 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
5872 /* exponent */
5873 put_ST_UNCHECKED(0, mkexpr(expF) );
5874 fp_push();
5875 /* significand */
5876 put_ST(0, mkexpr(sigF) );
5877 break;
5878 }
5879
sewardj4970e4e2008-10-11 10:07:55 +00005880 case 0xF5: { /* FPREM1 -- IEEE compliant */
5881 IRTemp a1 = newTemp(Ity_F64);
5882 IRTemp a2 = newTemp(Ity_F64);
5883 DIP("fprem1\n");
5884 /* Do FPREM1 twice, once to get the remainder, and once
5885 to get the C3210 flag values. */
5886 assign( a1, get_ST(0) );
5887 assign( a2, get_ST(1) );
5888 put_ST_UNCHECKED(0,
5889 triop(Iop_PRem1F64,
5890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5891 mkexpr(a1),
5892 mkexpr(a2)));
5893 put_C3210(
5894 unop(Iop_32Uto64,
5895 triop(Iop_PRem1C3210F64,
5896 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5897 mkexpr(a1),
5898 mkexpr(a2)) ));
5899 break;
5900 }
sewardj6847d8c2005-05-12 19:21:55 +00005901
5902 case 0xF7: /* FINCSTP */
5903 DIP("fincstp\n");
5904 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5905 break;
5906
sewardjf4c803b2006-09-11 11:07:34 +00005907 case 0xF8: { /* FPREM -- not IEEE compliant */
5908 IRTemp a1 = newTemp(Ity_F64);
5909 IRTemp a2 = newTemp(Ity_F64);
5910 DIP("fprem\n");
5911 /* Do FPREM twice, once to get the remainder, and once
5912 to get the C3210 flag values. */
5913 assign( a1, get_ST(0) );
5914 assign( a2, get_ST(1) );
5915 put_ST_UNCHECKED(0,
5916 triop(Iop_PRemF64,
5917 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5918 mkexpr(a1),
5919 mkexpr(a2)));
5920 put_C3210(
5921 unop(Iop_32Uto64,
5922 triop(Iop_PRemC3210F64,
5923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5924 mkexpr(a1),
5925 mkexpr(a2)) ));
5926 break;
5927 }
5928
sewardj5e205372005-05-09 02:57:08 +00005929 case 0xF9: /* FYL2XP1 */
5930 DIP("fyl2xp1\n");
sewardj4796d662006-02-05 16:06:26 +00005931 put_ST_UNCHECKED(1,
5932 triop(Iop_Yl2xp1F64,
5933 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5934 get_ST(1),
5935 get_ST(0)));
sewardj5e205372005-05-09 02:57:08 +00005936 fp_pop();
5937 break;
sewardje6939f02005-05-07 01:01:24 +00005938
5939 case 0xFA: /* FSQRT */
5940 DIP("fsqrt\n");
sewardj4796d662006-02-05 16:06:26 +00005941 put_ST_UNCHECKED(0,
5942 binop(Iop_SqrtF64,
5943 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5944 get_ST(0)));
sewardje6939f02005-05-07 01:01:24 +00005945 break;
5946
sewardj25a85812005-05-08 23:03:48 +00005947 case 0xFB: { /* FSINCOS */
sewardj25a85812005-05-08 23:03:48 +00005948 DIP("fsincos\n");
sewardje9c51c92014-04-30 22:50:34 +00005949 IRTemp argD = newTemp(Ity_F64);
5950 assign(argD, get_ST(0));
5951 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5952 IRTemp resD = newTemp(Ity_F64);
5953 assign(resD,
5954 IRExpr_ITE(
5955 mkexpr(argOK),
5956 binop(Iop_SinF64,
5957 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5958 mkexpr(argD)),
5959 mkexpr(argD))
5960 );
5961 put_ST_UNCHECKED(0, mkexpr(resD));
5962 /* Conditionally push the cos value on the stack, if
5963 the arg is in range */
5964 maybe_fp_push(argOK);
5965 maybe_put_ST(argOK, 0,
5966 binop(Iop_CosF64,
sewardj4796d662006-02-05 16:06:26 +00005967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardje9c51c92014-04-30 22:50:34 +00005968 mkexpr(argD)));
5969 set_C2( binop(Iop_Xor64,
5970 unop(Iop_1Uto64, mkexpr(argOK)),
5971 mkU64(1)) );
sewardj25a85812005-05-08 23:03:48 +00005972 break;
5973 }
5974
5975 case 0xFC: /* FRNDINT */
5976 DIP("frndint\n");
5977 put_ST_UNCHECKED(0,
sewardjb183b852006-02-03 16:08:03 +00005978 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
sewardj25a85812005-05-08 23:03:48 +00005979 break;
5980
5981 case 0xFD: /* FSCALE */
5982 DIP("fscale\n");
sewardj4796d662006-02-05 16:06:26 +00005983 put_ST_UNCHECKED(0,
5984 triop(Iop_ScaleF64,
5985 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5986 get_ST(0),
5987 get_ST(1)));
sewardj25a85812005-05-08 23:03:48 +00005988 break;
5989
sewardje9c51c92014-04-30 22:50:34 +00005990 case 0xFE: /* FSIN */
5991 case 0xFF: { /* FCOS */
5992 Bool isSIN = modrm == 0xFE;
5993 DIP("%s\n", isSIN ? "fsin" : "fcos");
5994 IRTemp argD = newTemp(Ity_F64);
5995 assign(argD, get_ST(0));
5996 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5997 IRTemp resD = newTemp(Ity_F64);
5998 assign(resD,
5999 IRExpr_ITE(
6000 mkexpr(argOK),
6001 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6002 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6003 mkexpr(argD)),
6004 mkexpr(argD))
6005 );
6006 put_ST_UNCHECKED(0, mkexpr(resD));
6007 set_C2( binop(Iop_Xor64,
6008 unop(Iop_1Uto64, mkexpr(argOK)),
6009 mkU64(1)) );
sewardj25a85812005-05-08 23:03:48 +00006010 break;
sewardje9c51c92014-04-30 22:50:34 +00006011 }
sewardj8d965312005-02-25 02:48:47 +00006012
6013 default:
6014 goto decode_fail;
6015 }
6016 }
6017 }
6018
6019 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6020 else
6021 if (first_opcode == 0xDA) {
6022
6023 if (modrm < 0xC0) {
6024
6025 /* bits 5,4,3 are an opcode extension, and the modRM also
6026 specifies an address. */
sewardj6847d8c2005-05-12 19:21:55 +00006027 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00006028 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00006029 delta += len;
sewardj901ed122005-02-27 13:25:31 +00006030 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00006031
sewardj6847d8c2005-05-12 19:21:55 +00006032 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6033 DIP("fiaddl %s\n", dis_buf);
6034 fop = Iop_AddF64;
6035 goto do_fop_m32;
6036
6037 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6038 DIP("fimull %s\n", dis_buf);
6039 fop = Iop_MulF64;
6040 goto do_fop_m32;
6041
6042 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6043 DIP("fisubl %s\n", dis_buf);
6044 fop = Iop_SubF64;
6045 goto do_fop_m32;
6046
6047 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6048 DIP("fisubrl %s\n", dis_buf);
6049 fop = Iop_SubF64;
6050 goto do_foprev_m32;
6051
6052 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6053 DIP("fisubl %s\n", dis_buf);
6054 fop = Iop_DivF64;
6055 goto do_fop_m32;
6056
6057 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6058 DIP("fidivrl %s\n", dis_buf);
6059 fop = Iop_DivF64;
6060 goto do_foprev_m32;
6061
6062 do_fop_m32:
6063 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006064 triop(fop,
6065 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00006066 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00006067 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006068 loadLE(Ity_I32, mkexpr(addr)))));
6069 break;
6070
6071 do_foprev_m32:
6072 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006073 triop(fop,
6074 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00006075 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006076 loadLE(Ity_I32, mkexpr(addr))),
6077 get_ST(0)));
6078 break;
sewardj8d965312005-02-25 02:48:47 +00006079
6080 default:
sewardj901ed122005-02-27 13:25:31 +00006081 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00006082 vex_printf("first_opcode == 0xDA\n");
6083 goto decode_fail;
6084 }
6085
6086 } else {
6087
6088 delta++;
6089 switch (modrm) {
6090
sewardj48a89d82005-05-06 11:50:13 +00006091 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6092 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00006093 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00006094 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006095 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006096 mk_amd64g_calculate_condition(AMD64CondB),
florian99dd03e2013-01-29 03:56:06 +00006097 get_ST(r_src), get_ST(0)) );
sewardj48a89d82005-05-06 11:50:13 +00006098 break;
sewardj8d965312005-02-25 02:48:47 +00006099
6100 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6101 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00006102 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00006103 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006104 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006105 mk_amd64g_calculate_condition(AMD64CondZ),
florian99dd03e2013-01-29 03:56:06 +00006106 get_ST(r_src), get_ST(0)) );
sewardj8d965312005-02-25 02:48:47 +00006107 break;
6108
sewardj37d52572005-02-25 14:22:12 +00006109 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6110 r_src = (UInt)modrm - 0xD0;
sewardj1027dc22005-02-26 01:55:02 +00006111 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
sewardj37d52572005-02-25 14:22:12 +00006112 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006113 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006114 mk_amd64g_calculate_condition(AMD64CondBE),
florian99dd03e2013-01-29 03:56:06 +00006115 get_ST(r_src), get_ST(0)) );
sewardj37d52572005-02-25 14:22:12 +00006116 break;
6117
sewardj25a85812005-05-08 23:03:48 +00006118 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6119 r_src = (UInt)modrm - 0xD8;
6120 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6121 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006122 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006123 mk_amd64g_calculate_condition(AMD64CondP),
florian99dd03e2013-01-29 03:56:06 +00006124 get_ST(r_src), get_ST(0)) );
sewardj25a85812005-05-08 23:03:48 +00006125 break;
6126
sewardje7f277a2008-04-28 21:05:33 +00006127 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6128 DIP("fucompp %%st(0),%%st(1)\n");
6129 /* This forces C1 to zero, which isn't right. */
6130 put_C3210(
6131 unop(Iop_32Uto64,
6132 binop( Iop_And32,
6133 binop(Iop_Shl32,
6134 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6135 mkU8(8)),
6136 mkU32(0x4500)
6137 )));
6138 fp_pop();
6139 fp_pop();
6140 break;
sewardj8d965312005-02-25 02:48:47 +00006141
6142 default:
6143 goto decode_fail;
6144 }
6145
6146 }
6147 }
6148
sewardjc49ce232005-02-25 13:03:03 +00006149 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6150 else
6151 if (first_opcode == 0xDB) {
6152 if (modrm < 0xC0) {
6153
6154 /* bits 5,4,3 are an opcode extension, and the modRM also
6155 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006156 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00006157 delta += len;
6158
sewardj901ed122005-02-27 13:25:31 +00006159 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00006160
sewardj5cc00ff2005-03-27 04:48:32 +00006161 case 0: /* FILD m32int */
6162 DIP("fildl %s\n", dis_buf);
6163 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006164 put_ST(0, unop(Iop_I32StoF64,
sewardj5cc00ff2005-03-27 04:48:32 +00006165 loadLE(Ity_I32, mkexpr(addr))));
6166 break;
6167
sewardjfcf21f32006-08-04 14:51:19 +00006168 case 1: /* FISTTPL m32 (SSE3) */
6169 DIP("fisttpl %s\n", dis_buf);
6170 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006171 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00006172 fp_pop();
6173 break;
6174
sewardj6847d8c2005-05-12 19:21:55 +00006175 case 2: /* FIST m32 */
6176 DIP("fistl %s\n", dis_buf);
6177 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006178 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00006179 break;
sewardj37d52572005-02-25 14:22:12 +00006180
6181 case 3: /* FISTP m32 */
6182 DIP("fistpl %s\n", dis_buf);
6183 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006184 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj37d52572005-02-25 14:22:12 +00006185 fp_pop();
6186 break;
6187
sewardj924215b2005-03-26 21:50:31 +00006188 case 5: { /* FLD extended-real */
6189 /* Uses dirty helper:
6190 ULong amd64g_loadF80le ( ULong )
6191 addr holds the address. First, do a dirty call to
6192 get hold of the data. */
6193 IRTemp val = newTemp(Ity_I64);
6194 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6195
6196 IRDirty* d = unsafeIRDirty_1_N (
6197 val,
6198 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00006199 "amd64g_dirtyhelper_loadF80le",
6200 &amd64g_dirtyhelper_loadF80le,
sewardj924215b2005-03-26 21:50:31 +00006201 args
6202 );
6203 /* declare that we're reading memory */
6204 d->mFx = Ifx_Read;
6205 d->mAddr = mkexpr(addr);
6206 d->mSize = 10;
6207
6208 /* execute the dirty call, dumping the result in val. */
6209 stmt( IRStmt_Dirty(d) );
6210 fp_push();
6211 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6212
6213 DIP("fldt %s\n", dis_buf);
6214 break;
6215 }
6216
6217 case 7: { /* FSTP extended-real */
6218 /* Uses dirty helper:
6219 void amd64g_storeF80le ( ULong addr, ULong data )
6220 */
6221 IRExpr** args
6222 = mkIRExprVec_2( mkexpr(addr),
6223 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6224
6225 IRDirty* d = unsafeIRDirty_0_N (
6226 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00006227 "amd64g_dirtyhelper_storeF80le",
6228 &amd64g_dirtyhelper_storeF80le,
sewardj924215b2005-03-26 21:50:31 +00006229 args
6230 );
6231 /* declare we're writing memory */
6232 d->mFx = Ifx_Write;
6233 d->mAddr = mkexpr(addr);
6234 d->mSize = 10;
6235
6236 /* execute the dirty call. */
6237 stmt( IRStmt_Dirty(d) );
6238 fp_pop();
6239
6240 DIP("fstpt\n %s", dis_buf);
6241 break;
6242 }
sewardjc49ce232005-02-25 13:03:03 +00006243
6244 default:
sewardj901ed122005-02-27 13:25:31 +00006245 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00006246 vex_printf("first_opcode == 0xDB\n");
6247 goto decode_fail;
6248 }
6249
6250 } else {
6251
6252 delta++;
6253 switch (modrm) {
6254
sewardj48a89d82005-05-06 11:50:13 +00006255 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6256 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00006257 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00006258 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006259 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006260 mk_amd64g_calculate_condition(AMD64CondNB),
florian99dd03e2013-01-29 03:56:06 +00006261 get_ST(r_src), get_ST(0)) );
sewardj48a89d82005-05-06 11:50:13 +00006262 break;
sewardj924215b2005-03-26 21:50:31 +00006263
6264 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6265 r_src = (UInt)modrm - 0xC8;
sewardj40e144d2005-03-28 00:46:27 +00006266 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00006267 put_ST_UNCHECKED(
6268 0,
florian99dd03e2013-01-29 03:56:06 +00006269 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006270 mk_amd64g_calculate_condition(AMD64CondNZ),
florian99dd03e2013-01-29 03:56:06 +00006271 get_ST(r_src),
6272 get_ST(0)
sewardj137015d2005-03-27 04:01:15 +00006273 )
6274 );
sewardj924215b2005-03-26 21:50:31 +00006275 break;
6276
sewardj137015d2005-03-27 04:01:15 +00006277 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6278 r_src = (UInt)modrm - 0xD0;
sewardj40e144d2005-03-28 00:46:27 +00006279 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00006280 put_ST_UNCHECKED(
6281 0,
florian99dd03e2013-01-29 03:56:06 +00006282 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006283 mk_amd64g_calculate_condition(AMD64CondNBE),
florian99dd03e2013-01-29 03:56:06 +00006284 get_ST(r_src),
6285 get_ST(0)
sewardj137015d2005-03-27 04:01:15 +00006286 )
6287 );
6288 break;
6289
sewardj3368e102006-03-06 19:05:07 +00006290 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6291 r_src = (UInt)modrm - 0xD8;
6292 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6293 put_ST_UNCHECKED(
6294 0,
florian99dd03e2013-01-29 03:56:06 +00006295 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006296 mk_amd64g_calculate_condition(AMD64CondNP),
florian99dd03e2013-01-29 03:56:06 +00006297 get_ST(r_src),
6298 get_ST(0)
sewardj3368e102006-03-06 19:05:07 +00006299 )
6300 );
6301 break;
6302
sewardj4e1a1e92005-05-25 00:44:13 +00006303 case 0xE2:
6304 DIP("fnclex\n");
6305 break;
6306
sewardj0585a032005-11-05 02:55:06 +00006307 case 0xE3: {
6308 /* Uses dirty helper:
6309 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
6310 IRDirty* d = unsafeIRDirty_0_N (
6311 0/*regparms*/,
6312 "amd64g_dirtyhelper_FINIT",
6313 &amd64g_dirtyhelper_FINIT,
florian90419562013-08-15 20:54:52 +00006314 mkIRExprVec_1( IRExpr_BBPTR() )
sewardj0585a032005-11-05 02:55:06 +00006315 );
sewardj0585a032005-11-05 02:55:06 +00006316
6317 /* declare we're writing guest state */
6318 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006319 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj0585a032005-11-05 02:55:06 +00006320
6321 d->fxState[0].fx = Ifx_Write;
6322 d->fxState[0].offset = OFFB_FTOP;
6323 d->fxState[0].size = sizeof(UInt);
6324
6325 d->fxState[1].fx = Ifx_Write;
6326 d->fxState[1].offset = OFFB_FPREGS;
6327 d->fxState[1].size = 8 * sizeof(ULong);
6328
6329 d->fxState[2].fx = Ifx_Write;
6330 d->fxState[2].offset = OFFB_FPTAGS;
6331 d->fxState[2].size = 8 * sizeof(UChar);
6332
6333 d->fxState[3].fx = Ifx_Write;
6334 d->fxState[3].offset = OFFB_FPROUND;
6335 d->fxState[3].size = sizeof(ULong);
6336
6337 d->fxState[4].fx = Ifx_Write;
6338 d->fxState[4].offset = OFFB_FC3210;
6339 d->fxState[4].size = sizeof(ULong);
6340
6341 stmt( IRStmt_Dirty(d) );
6342
6343 DIP("fninit\n");
6344 break;
6345 }
sewardjc49ce232005-02-25 13:03:03 +00006346
6347 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6348 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6349 break;
6350
sewardj48a89d82005-05-06 11:50:13 +00006351 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6352 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6353 break;
sewardjc49ce232005-02-25 13:03:03 +00006354
6355 default:
6356 goto decode_fail;
6357 }
6358 }
6359 }
6360
sewardj137015d2005-03-27 04:01:15 +00006361 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6362 else
6363 if (first_opcode == 0xDC) {
6364 if (modrm < 0xC0) {
6365
sewardj434e0692005-03-27 17:36:08 +00006366 /* bits 5,4,3 are an opcode extension, and the modRM also
6367 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006368 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj434e0692005-03-27 17:36:08 +00006369 delta += len;
6370
6371 switch (gregLO3ofRM(modrm)) {
6372
sewardje6939f02005-05-07 01:01:24 +00006373 case 0: /* FADD double-real */
6374 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6375 break;
6376
6377 case 1: /* FMUL double-real */
6378 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6379 break;
6380
sewardjd20c8852005-01-20 20:04:07 +00006381//.. case 2: /* FCOM double-real */
6382//.. DIP("fcoml %s\n", dis_buf);
6383//.. /* This forces C1 to zero, which isn't right. */
6384//.. put_C3210(
6385//.. binop( Iop_And32,
6386//.. binop(Iop_Shl32,
6387//.. binop(Iop_CmpF64,
6388//.. get_ST(0),
6389//.. loadLE(Ity_F64,mkexpr(addr))),
6390//.. mkU8(8)),
6391//.. mkU32(0x4500)
6392//.. ));
6393//.. break;
sewardj566d2c72005-08-10 11:43:42 +00006394
6395 case 3: /* FCOMP double-real */
6396 DIP("fcompl %s\n", dis_buf);
6397 /* This forces C1 to zero, which isn't right. */
6398 put_C3210(
6399 unop(Iop_32Uto64,
6400 binop( Iop_And32,
6401 binop(Iop_Shl32,
6402 binop(Iop_CmpF64,
6403 get_ST(0),
6404 loadLE(Ity_F64,mkexpr(addr))),
6405 mkU8(8)),
6406 mkU32(0x4500)
6407 )));
6408 fp_pop();
6409 break;
sewardje6939f02005-05-07 01:01:24 +00006410
6411 case 4: /* FSUB double-real */
6412 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6413 break;
sewardj434e0692005-03-27 17:36:08 +00006414
6415 case 5: /* FSUBR double-real */
6416 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6417 break;
6418
sewardje6939f02005-05-07 01:01:24 +00006419 case 6: /* FDIV double-real */
6420 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6421 break;
6422
6423 case 7: /* FDIVR double-real */
6424 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6425 break;
sewardj434e0692005-03-27 17:36:08 +00006426
6427 default:
6428 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6429 vex_printf("first_opcode == 0xDC\n");
6430 goto decode_fail;
6431 }
sewardj137015d2005-03-27 04:01:15 +00006432
6433 } else {
6434
6435 delta++;
6436 switch (modrm) {
6437
6438 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6439 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6440 break;
6441
sewardj7bc00082005-03-27 05:08:32 +00006442 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6443 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6444 break;
6445
sewardj434e0692005-03-27 17:36:08 +00006446 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6447 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6448 break;
6449
sewardje6939f02005-05-07 01:01:24 +00006450 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6451 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6452 break;
6453
6454 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6455 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6456 break;
sewardj137015d2005-03-27 04:01:15 +00006457
6458 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6459 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6460 break;
6461
6462 default:
6463 goto decode_fail;
6464 }
6465
6466 }
6467 }
sewardj8d965312005-02-25 02:48:47 +00006468
6469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6470 else
6471 if (first_opcode == 0xDD) {
6472
6473 if (modrm < 0xC0) {
6474
6475 /* bits 5,4,3 are an opcode extension, and the modRM also
6476 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006477 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00006478 delta += len;
6479
sewardj901ed122005-02-27 13:25:31 +00006480 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00006481
6482 case 0: /* FLD double-real */
6483 DIP("fldl %s\n", dis_buf);
6484 fp_push();
sewardjaf1ceca2005-06-30 23:31:27 +00006485 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
sewardj8d965312005-02-25 02:48:47 +00006486 break;
6487
sewardjfcf21f32006-08-04 14:51:19 +00006488 case 1: /* FISTTPQ m64 (SSE3) */
6489 DIP("fistppll %s\n", dis_buf);
6490 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006491 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00006492 fp_pop();
6493 break;
6494
sewardjc49ce232005-02-25 13:03:03 +00006495 case 2: /* FST double-real */
6496 DIP("fstl %s\n", dis_buf);
6497 storeLE(mkexpr(addr), get_ST(0));
6498 break;
sewardj8d965312005-02-25 02:48:47 +00006499
6500 case 3: /* FSTP double-real */
6501 DIP("fstpl %s\n", dis_buf);
6502 storeLE(mkexpr(addr), get_ST(0));
6503 fp_pop();
6504 break;
6505
sewardj9ae42a72012-02-16 14:18:56 +00006506 case 4: { /* FRSTOR m94/m108 */
6507 IRTemp ew = newTemp(Ity_I32);
6508 IRTemp w64 = newTemp(Ity_I64);
6509 IRDirty* d;
6510 if ( have66(pfx) ) {
6511 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00006512 VexEmNote amd64g_dirtyhelper_FRSTORS
sewardj9ae42a72012-02-16 14:18:56 +00006513 ( VexGuestAMD64State*, HWord ) */
6514 d = unsafeIRDirty_0_N (
6515 0/*regparms*/,
6516 "amd64g_dirtyhelper_FRSTORS",
6517 &amd64g_dirtyhelper_FRSTORS,
6518 mkIRExprVec_1( mkexpr(addr) )
6519 );
6520 d->mSize = 94;
6521 } else {
6522 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00006523 VexEmNote amd64g_dirtyhelper_FRSTOR
sewardj9ae42a72012-02-16 14:18:56 +00006524 ( VexGuestAMD64State*, HWord ) */
6525 d = unsafeIRDirty_0_N (
6526 0/*regparms*/,
6527 "amd64g_dirtyhelper_FRSTOR",
6528 &amd64g_dirtyhelper_FRSTOR,
florian90419562013-08-15 20:54:52 +00006529 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj9ae42a72012-02-16 14:18:56 +00006530 );
6531 d->mSize = 108;
6532 }
6533
sewardj74142b82013-08-08 10:28:59 +00006534 d->tmp = w64;
sewardj9ae42a72012-02-16 14:18:56 +00006535 /* declare we're reading memory */
6536 d->mFx = Ifx_Read;
6537 d->mAddr = mkexpr(addr);
6538 /* d->mSize set above */
6539
6540 /* declare we're writing guest state */
6541 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006542 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006543
6544 d->fxState[0].fx = Ifx_Write;
6545 d->fxState[0].offset = OFFB_FTOP;
6546 d->fxState[0].size = sizeof(UInt);
6547
6548 d->fxState[1].fx = Ifx_Write;
6549 d->fxState[1].offset = OFFB_FPREGS;
6550 d->fxState[1].size = 8 * sizeof(ULong);
6551
6552 d->fxState[2].fx = Ifx_Write;
6553 d->fxState[2].offset = OFFB_FPTAGS;
6554 d->fxState[2].size = 8 * sizeof(UChar);
6555
6556 d->fxState[3].fx = Ifx_Write;
6557 d->fxState[3].offset = OFFB_FPROUND;
6558 d->fxState[3].size = sizeof(ULong);
6559
6560 d->fxState[4].fx = Ifx_Write;
6561 d->fxState[4].offset = OFFB_FC3210;
6562 d->fxState[4].size = sizeof(ULong);
6563
6564 stmt( IRStmt_Dirty(d) );
6565
6566 /* ew contains any emulation warning we may need to
6567 issue. If needed, side-exit to the next insn,
6568 reporting the warning, so that Valgrind's dispatcher
6569 sees the warning. */
6570 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6571 put_emwarn( mkexpr(ew) );
6572 stmt(
6573 IRStmt_Exit(
6574 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6575 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00006576 IRConst_U64( guest_RIP_bbstart+delta ),
6577 OFFB_RIP
sewardj9ae42a72012-02-16 14:18:56 +00006578 )
6579 );
6580
6581 if ( have66(pfx) ) {
6582 DIP("frstors %s\n", dis_buf);
6583 } else {
6584 DIP("frstor %s\n", dis_buf);
6585 }
6586 break;
6587 }
6588
6589 case 6: { /* FNSAVE m94/m108 */
6590 IRDirty *d;
6591 if ( have66(pfx) ) {
6592 /* Uses dirty helper:
sewardj74142b82013-08-08 10:28:59 +00006593 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6594 HWord ) */
sewardj9ae42a72012-02-16 14:18:56 +00006595 d = unsafeIRDirty_0_N (
6596 0/*regparms*/,
6597 "amd64g_dirtyhelper_FNSAVES",
6598 &amd64g_dirtyhelper_FNSAVES,
6599 mkIRExprVec_1( mkexpr(addr) )
6600 );
6601 d->mSize = 94;
6602 } else {
6603 /* Uses dirty helper:
sewardj74142b82013-08-08 10:28:59 +00006604 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6605 HWord ) */
sewardj9ae42a72012-02-16 14:18:56 +00006606 d = unsafeIRDirty_0_N (
6607 0/*regparms*/,
6608 "amd64g_dirtyhelper_FNSAVE",
6609 &amd64g_dirtyhelper_FNSAVE,
florian90419562013-08-15 20:54:52 +00006610 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj74142b82013-08-08 10:28:59 +00006611 );
sewardj9ae42a72012-02-16 14:18:56 +00006612 d->mSize = 108;
6613 }
sewardj74142b82013-08-08 10:28:59 +00006614
sewardj9ae42a72012-02-16 14:18:56 +00006615 /* declare we're writing memory */
6616 d->mFx = Ifx_Write;
6617 d->mAddr = mkexpr(addr);
6618 /* d->mSize set above */
6619
6620 /* declare we're reading guest state */
6621 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006622 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006623
6624 d->fxState[0].fx = Ifx_Read;
6625 d->fxState[0].offset = OFFB_FTOP;
6626 d->fxState[0].size = sizeof(UInt);
6627
6628 d->fxState[1].fx = Ifx_Read;
6629 d->fxState[1].offset = OFFB_FPREGS;
6630 d->fxState[1].size = 8 * sizeof(ULong);
6631
6632 d->fxState[2].fx = Ifx_Read;
6633 d->fxState[2].offset = OFFB_FPTAGS;
6634 d->fxState[2].size = 8 * sizeof(UChar);
6635
6636 d->fxState[3].fx = Ifx_Read;
6637 d->fxState[3].offset = OFFB_FPROUND;
6638 d->fxState[3].size = sizeof(ULong);
6639
6640 d->fxState[4].fx = Ifx_Read;
6641 d->fxState[4].offset = OFFB_FC3210;
6642 d->fxState[4].size = sizeof(ULong);
6643
6644 stmt( IRStmt_Dirty(d) );
6645
6646 if ( have66(pfx) ) {
6647 DIP("fnsaves %s\n", dis_buf);
6648 } else {
6649 DIP("fnsave %s\n", dis_buf);
6650 }
6651 break;
6652 }
sewardj8d965312005-02-25 02:48:47 +00006653
sewardj7c2d2822006-03-07 00:22:02 +00006654 case 7: { /* FNSTSW m16 */
6655 IRExpr* sw = get_FPU_sw();
sewardjdd40fdf2006-12-24 02:20:24 +00006656 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
sewardj7c2d2822006-03-07 00:22:02 +00006657 storeLE( mkexpr(addr), sw );
6658 DIP("fnstsw %s\n", dis_buf);
6659 break;
6660 }
6661
sewardj8d965312005-02-25 02:48:47 +00006662 default:
sewardj901ed122005-02-27 13:25:31 +00006663 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00006664 vex_printf("first_opcode == 0xDD\n");
6665 goto decode_fail;
6666 }
6667 } else {
6668 delta++;
6669 switch (modrm) {
6670
sewardj6847d8c2005-05-12 19:21:55 +00006671 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6672 r_dst = (UInt)modrm - 0xC0;
6673 DIP("ffree %%st(%u)\n", r_dst);
6674 put_ST_TAG ( r_dst, mkU8(0) );
6675 break;
6676
sewardjbfabcc42005-08-08 09:58:05 +00006677 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6678 r_dst = (UInt)modrm - 0xD0;
sewardjc7cd2142005-09-09 22:31:49 +00006679 DIP("fst %%st(0),%%st(%u)\n", r_dst);
sewardjbfabcc42005-08-08 09:58:05 +00006680 /* P4 manual says: "If the destination operand is a
6681 non-empty register, the invalid-operation exception
6682 is not generated. Hence put_ST_UNCHECKED. */
6683 put_ST_UNCHECKED(r_dst, get_ST(0));
6684 break;
sewardj8d965312005-02-25 02:48:47 +00006685
6686 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6687 r_dst = (UInt)modrm - 0xD8;
sewardj1027dc22005-02-26 01:55:02 +00006688 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
sewardj8d965312005-02-25 02:48:47 +00006689 /* P4 manual says: "If the destination operand is a
6690 non-empty register, the invalid-operation exception
6691 is not generated. Hence put_ST_UNCHECKED. */
6692 put_ST_UNCHECKED(r_dst, get_ST(0));
6693 fp_pop();
6694 break;
6695
sewardjfb6c1792005-10-05 17:58:32 +00006696 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6697 r_dst = (UInt)modrm - 0xE0;
sewardj62d05432005-10-29 22:30:47 +00006698 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
sewardjfb6c1792005-10-05 17:58:32 +00006699 /* This forces C1 to zero, which isn't right. */
6700 put_C3210(
6701 unop(Iop_32Uto64,
6702 binop( Iop_And32,
6703 binop(Iop_Shl32,
6704 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6705 mkU8(8)),
6706 mkU32(0x4500)
6707 )));
6708 break;
6709
sewardj9fb2f472005-11-05 01:12:18 +00006710 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6711 r_dst = (UInt)modrm - 0xE8;
sewardj43f45732005-11-05 13:04:34 +00006712 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
sewardj9fb2f472005-11-05 01:12:18 +00006713 /* This forces C1 to zero, which isn't right. */
6714 put_C3210(
6715 unop(Iop_32Uto64,
6716 binop( Iop_And32,
6717 binop(Iop_Shl32,
6718 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6719 mkU8(8)),
6720 mkU32(0x4500)
6721 )));
6722 fp_pop();
6723 break;
sewardj8d965312005-02-25 02:48:47 +00006724
6725 default:
6726 goto decode_fail;
6727 }
6728 }
6729 }
6730
sewardj137015d2005-03-27 04:01:15 +00006731 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6732 else
6733 if (first_opcode == 0xDE) {
6734
6735 if (modrm < 0xC0) {
6736
sewardj6847d8c2005-05-12 19:21:55 +00006737 /* bits 5,4,3 are an opcode extension, and the modRM also
6738 specifies an address. */
6739 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00006740 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj6847d8c2005-05-12 19:21:55 +00006741 delta += len;
6742
6743 switch (gregLO3ofRM(modrm)) {
6744
6745 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6746 DIP("fiaddw %s\n", dis_buf);
6747 fop = Iop_AddF64;
6748 goto do_fop_m16;
6749
6750 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6751 DIP("fimulw %s\n", dis_buf);
6752 fop = Iop_MulF64;
6753 goto do_fop_m16;
6754
6755 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6756 DIP("fisubw %s\n", dis_buf);
6757 fop = Iop_SubF64;
6758 goto do_fop_m16;
6759
6760 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6761 DIP("fisubrw %s\n", dis_buf);
6762 fop = Iop_SubF64;
6763 goto do_foprev_m16;
6764
6765 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6766 DIP("fisubw %s\n", dis_buf);
6767 fop = Iop_DivF64;
6768 goto do_fop_m16;
6769
6770 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6771 DIP("fidivrw %s\n", dis_buf);
6772 fop = Iop_DivF64;
6773 goto do_foprev_m16;
6774
6775 do_fop_m16:
6776 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006777 triop(fop,
6778 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00006779 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00006780 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006781 unop(Iop_16Sto32,
6782 loadLE(Ity_I16, mkexpr(addr))))));
6783 break;
6784
6785 do_foprev_m16:
6786 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006787 triop(fop,
6788 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00006789 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006790 unop(Iop_16Sto32,
6791 loadLE(Ity_I16, mkexpr(addr)))),
6792 get_ST(0)));
6793 break;
6794
6795 default:
6796 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6797 vex_printf("first_opcode == 0xDE\n");
6798 goto decode_fail;
6799 }
sewardj137015d2005-03-27 04:01:15 +00006800
6801 } else {
6802
6803 delta++;
6804 switch (modrm) {
6805
6806 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6807 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6808 break;
6809
6810 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6811 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6812 break;
6813
sewardj1859ecd2007-02-23 08:48:22 +00006814 case 0xD9: /* FCOMPP %st(0),%st(1) */
6815 DIP("fcompp %%st(0),%%st(1)\n");
6816 /* This forces C1 to zero, which isn't right. */
6817 put_C3210(
6818 unop(Iop_32Uto64,
6819 binop( Iop_And32,
6820 binop(Iop_Shl32,
6821 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6822 mkU8(8)),
6823 mkU32(0x4500)
6824 )));
6825 fp_pop();
6826 fp_pop();
6827 break;
sewardj137015d2005-03-27 04:01:15 +00006828
6829 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6830 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6831 break;
6832
6833 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6834 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6835 break;
6836
6837 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6838 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6839 break;
6840
6841 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6842 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6843 break;
6844
6845 default:
6846 goto decode_fail;
6847 }
6848
6849 }
6850 }
sewardjc49ce232005-02-25 13:03:03 +00006851
6852 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6853 else
6854 if (first_opcode == 0xDF) {
6855
6856 if (modrm < 0xC0) {
6857
6858 /* bits 5,4,3 are an opcode extension, and the modRM also
6859 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006860 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00006861 delta += len;
6862
sewardj901ed122005-02-27 13:25:31 +00006863 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00006864
sewardj434e0692005-03-27 17:36:08 +00006865 case 0: /* FILD m16int */
6866 DIP("fildw %s\n", dis_buf);
6867 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006868 put_ST(0, unop(Iop_I32StoF64,
sewardj434e0692005-03-27 17:36:08 +00006869 unop(Iop_16Sto32,
6870 loadLE(Ity_I16, mkexpr(addr)))));
6871 break;
6872
sewardjfcf21f32006-08-04 14:51:19 +00006873 case 1: /* FISTTPS m16 (SSE3) */
6874 DIP("fisttps %s\n", dis_buf);
6875 storeLE( mkexpr(addr),
sewardjb707d102007-07-11 22:49:26 +00006876 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006877 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
sewardjfcf21f32006-08-04 14:51:19 +00006878 fp_pop();
6879 break;
6880
sewardj9f5c8fd2010-05-10 20:08:12 +00006881 case 2: /* FIST m16 */
6882 DIP("fists %s\n", dis_buf);
6883 storeLE( mkexpr(addr),
6884 x87ishly_qnarrow_32_to_16(
6885 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6886 break;
sewardj6847d8c2005-05-12 19:21:55 +00006887
sewardjb707d102007-07-11 22:49:26 +00006888 case 3: /* FISTP m16 */
6889 DIP("fistps %s\n", dis_buf);
6890 storeLE( mkexpr(addr),
6891 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006892 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
sewardjb707d102007-07-11 22:49:26 +00006893 fp_pop();
6894 break;
sewardj37d52572005-02-25 14:22:12 +00006895
6896 case 5: /* FILD m64 */
6897 DIP("fildll %s\n", dis_buf);
6898 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006899 put_ST(0, binop(Iop_I64StoF64,
sewardj37d52572005-02-25 14:22:12 +00006900 get_roundingmode(),
6901 loadLE(Ity_I64, mkexpr(addr))));
6902 break;
6903
sewardj6847d8c2005-05-12 19:21:55 +00006904 case 7: /* FISTP m64 */
6905 DIP("fistpll %s\n", dis_buf);
6906 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006907 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00006908 fp_pop();
6909 break;
sewardjc49ce232005-02-25 13:03:03 +00006910
6911 default:
sewardj901ed122005-02-27 13:25:31 +00006912 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00006913 vex_printf("first_opcode == 0xDF\n");
6914 goto decode_fail;
6915 }
6916
6917 } else {
6918
6919 delta++;
6920 switch (modrm) {
6921
6922 case 0xC0: /* FFREEP %st(0) */
6923 DIP("ffreep %%st(%d)\n", 0);
6924 put_ST_TAG ( 0, mkU8(0) );
6925 fp_pop();
6926 break;
6927
sewardj4f9847d2005-07-25 11:58:34 +00006928 case 0xE0: /* FNSTSW %ax */
6929 DIP("fnstsw %%ax\n");
6930 /* Invent a plausible-looking FPU status word value and
6931 dump it in %AX:
6932 ((ftop & 7) << 11) | (c3210 & 0x4700)
6933 */
6934 putIRegRAX(
6935 2,
6936 unop(Iop_32to16,
6937 binop(Iop_Or32,
6938 binop(Iop_Shl32,
6939 binop(Iop_And32, get_ftop(), mkU32(7)),
6940 mkU8(11)),
6941 binop(Iop_And32,
6942 unop(Iop_64to32, get_C3210()),
6943 mkU32(0x4700))
6944 )));
6945 break;
sewardj924215b2005-03-26 21:50:31 +00006946
6947 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
6948 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
6949 break;
6950
sewardj48a89d82005-05-06 11:50:13 +00006951 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
6952 /* not really right since COMIP != UCOMIP */
6953 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
6954 break;
sewardjc49ce232005-02-25 13:03:03 +00006955
6956 default:
6957 goto decode_fail;
6958 }
6959 }
6960
6961 }
sewardj8d965312005-02-25 02:48:47 +00006962
6963 else
sewardj137015d2005-03-27 04:01:15 +00006964 goto decode_fail;
sewardj8d965312005-02-25 02:48:47 +00006965
6966 *decode_ok = True;
6967 return delta;
6968
6969 decode_fail:
6970 *decode_ok = False;
6971 return delta;
6972}
6973
6974
sewardj8711f662005-05-09 17:52:56 +00006975/*------------------------------------------------------------*/
6976/*--- ---*/
6977/*--- MMX INSTRUCTIONS ---*/
6978/*--- ---*/
6979/*------------------------------------------------------------*/
6980
6981/* Effect of MMX insns on x87 FPU state (table 11-2 of
6982 IA32 arch manual, volume 3):
6983
6984 Read from, or write to MMX register (viz, any insn except EMMS):
6985 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
6986 * FP stack pointer set to zero
6987
6988 EMMS:
6989 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
6990 * FP stack pointer set to zero
6991*/
6992
6993static void do_MMX_preamble ( void )
6994{
sewardjdd40fdf2006-12-24 02:20:24 +00006995 Int i;
6996 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6997 IRExpr* zero = mkU32(0);
6998 IRExpr* tag1 = mkU8(1);
sewardj8711f662005-05-09 17:52:56 +00006999 put_ftop(zero);
7000 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00007001 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
sewardj8711f662005-05-09 17:52:56 +00007002}
7003
7004static void do_EMMS_preamble ( void )
7005{
sewardjdd40fdf2006-12-24 02:20:24 +00007006 Int i;
7007 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7008 IRExpr* zero = mkU32(0);
7009 IRExpr* tag0 = mkU8(0);
sewardj8711f662005-05-09 17:52:56 +00007010 put_ftop(zero);
7011 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00007012 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
sewardj8711f662005-05-09 17:52:56 +00007013}
7014
7015
7016static IRExpr* getMMXReg ( UInt archreg )
7017{
7018 vassert(archreg < 8);
7019 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7020}
7021
7022
7023static void putMMXReg ( UInt archreg, IRExpr* e )
7024{
7025 vassert(archreg < 8);
sewardjdd40fdf2006-12-24 02:20:24 +00007026 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj8711f662005-05-09 17:52:56 +00007027 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7028}
7029
7030
7031/* Helper for non-shift MMX insns. Note this is incomplete in the
7032 sense that it does not first call do_MMX_preamble() -- that is the
7033 responsibility of its caller. */
7034
7035static
floriancacba8e2014-12-15 18:58:07 +00007036ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00007037 Prefix pfx,
7038 Long delta,
7039 UChar opc,
florian55085f82012-11-21 00:36:55 +00007040 const HChar* name,
sewardj2e28ac42008-12-04 00:05:12 +00007041 Bool show_granularity )
sewardj8711f662005-05-09 17:52:56 +00007042{
7043 HChar dis_buf[50];
7044 UChar modrm = getUChar(delta);
7045 Bool isReg = epartIsReg(modrm);
7046 IRExpr* argL = NULL;
7047 IRExpr* argR = NULL;
7048 IRExpr* argG = NULL;
7049 IRExpr* argE = NULL;
7050 IRTemp res = newTemp(Ity_I64);
7051
7052 Bool invG = False;
7053 IROp op = Iop_INVALID;
7054 void* hAddr = NULL;
florian55085f82012-11-21 00:36:55 +00007055 const HChar* hName = NULL;
sewardj8711f662005-05-09 17:52:56 +00007056 Bool eLeft = False;
7057
7058# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7059
7060 switch (opc) {
7061 /* Original MMX ones */
7062 case 0xFC: op = Iop_Add8x8; break;
7063 case 0xFD: op = Iop_Add16x4; break;
7064 case 0xFE: op = Iop_Add32x2; break;
7065
7066 case 0xEC: op = Iop_QAdd8Sx8; break;
7067 case 0xED: op = Iop_QAdd16Sx4; break;
7068
7069 case 0xDC: op = Iop_QAdd8Ux8; break;
7070 case 0xDD: op = Iop_QAdd16Ux4; break;
7071
7072 case 0xF8: op = Iop_Sub8x8; break;
7073 case 0xF9: op = Iop_Sub16x4; break;
7074 case 0xFA: op = Iop_Sub32x2; break;
7075
7076 case 0xE8: op = Iop_QSub8Sx8; break;
7077 case 0xE9: op = Iop_QSub16Sx4; break;
7078
7079 case 0xD8: op = Iop_QSub8Ux8; break;
7080 case 0xD9: op = Iop_QSub16Ux4; break;
7081
7082 case 0xE5: op = Iop_MulHi16Sx4; break;
7083 case 0xD5: op = Iop_Mul16x4; break;
7084 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7085
7086 case 0x74: op = Iop_CmpEQ8x8; break;
7087 case 0x75: op = Iop_CmpEQ16x4; break;
7088 case 0x76: op = Iop_CmpEQ32x2; break;
7089
7090 case 0x64: op = Iop_CmpGT8Sx8; break;
7091 case 0x65: op = Iop_CmpGT16Sx4; break;
7092 case 0x66: op = Iop_CmpGT32Sx2; break;
7093
sewardj5f438dd2011-06-16 11:36:23 +00007094 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7095 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7096 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
sewardj8711f662005-05-09 17:52:56 +00007097
7098 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7099 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7100 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7101
7102 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7103 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7104 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7105
7106 case 0xDB: op = Iop_And64; break;
7107 case 0xDF: op = Iop_And64; invG = True; break;
7108 case 0xEB: op = Iop_Or64; break;
7109 case 0xEF: /* Possibly do better here if argL and argR are the
7110 same reg */
7111 op = Iop_Xor64; break;
7112
7113 /* Introduced in SSE1 */
7114 case 0xE0: op = Iop_Avg8Ux8; break;
7115 case 0xE3: op = Iop_Avg16Ux4; break;
7116 case 0xEE: op = Iop_Max16Sx4; break;
7117 case 0xDE: op = Iop_Max8Ux8; break;
7118 case 0xEA: op = Iop_Min16Sx4; break;
7119 case 0xDA: op = Iop_Min8Ux8; break;
7120 case 0xE4: op = Iop_MulHi16Ux4; break;
sewardja7ba8c42005-05-10 20:08:34 +00007121 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
sewardj8711f662005-05-09 17:52:56 +00007122
7123 /* Introduced in SSE2 */
7124 case 0xD4: op = Iop_Add64; break;
7125 case 0xFB: op = Iop_Sub64; break;
7126
7127 default:
7128 vex_printf("\n0x%x\n", (Int)opc);
7129 vpanic("dis_MMXop_regmem_to_reg");
7130 }
7131
7132# undef XXX
7133
7134 argG = getMMXReg(gregLO3ofRM(modrm));
7135 if (invG)
7136 argG = unop(Iop_Not64, argG);
7137
7138 if (isReg) {
7139 delta++;
7140 argE = getMMXReg(eregLO3ofRM(modrm));
7141 } else {
7142 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00007143 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007144 delta += len;
7145 argE = loadLE(Ity_I64, mkexpr(addr));
7146 }
7147
7148 if (eLeft) {
7149 argL = argE;
7150 argR = argG;
7151 } else {
7152 argL = argG;
7153 argR = argE;
7154 }
7155
7156 if (op != Iop_INVALID) {
7157 vassert(hName == NULL);
7158 vassert(hAddr == NULL);
7159 assign(res, binop(op, argL, argR));
7160 } else {
7161 vassert(hName != NULL);
7162 vassert(hAddr != NULL);
7163 assign( res,
7164 mkIRExprCCall(
7165 Ity_I64,
7166 0/*regparms*/, hName, hAddr,
7167 mkIRExprVec_2( argL, argR )
7168 )
7169 );
7170 }
7171
7172 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7173
7174 DIP("%s%s %s, %s\n",
7175 name, show_granularity ? nameMMXGran(opc & 3) : "",
7176 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7177 nameMMXReg(gregLO3ofRM(modrm)) );
7178
7179 return delta;
7180}
7181
7182
7183/* Vector by scalar shift of G by the amount specified at the bottom
7184 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7185
floriancacba8e2014-12-15 18:58:07 +00007186static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00007187 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00007188 const HChar* opname, IROp op )
sewardj8711f662005-05-09 17:52:56 +00007189{
7190 HChar dis_buf[50];
7191 Int alen, size;
7192 IRTemp addr;
7193 Bool shl, shr, sar;
7194 UChar rm = getUChar(delta);
7195 IRTemp g0 = newTemp(Ity_I64);
7196 IRTemp g1 = newTemp(Ity_I64);
7197 IRTemp amt = newTemp(Ity_I64);
7198 IRTemp amt8 = newTemp(Ity_I8);
7199
7200 if (epartIsReg(rm)) {
7201 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7202 DIP("%s %s,%s\n", opname,
7203 nameMMXReg(eregLO3ofRM(rm)),
7204 nameMMXReg(gregLO3ofRM(rm)) );
7205 delta++;
7206 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007208 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7209 DIP("%s %s,%s\n", opname,
7210 dis_buf,
7211 nameMMXReg(gregLO3ofRM(rm)) );
7212 delta += alen;
7213 }
7214 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7215 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7216
7217 shl = shr = sar = False;
7218 size = 0;
7219 switch (op) {
7220 case Iop_ShlN16x4: shl = True; size = 32; break;
7221 case Iop_ShlN32x2: shl = True; size = 32; break;
7222 case Iop_Shl64: shl = True; size = 64; break;
7223 case Iop_ShrN16x4: shr = True; size = 16; break;
7224 case Iop_ShrN32x2: shr = True; size = 32; break;
7225 case Iop_Shr64: shr = True; size = 64; break;
7226 case Iop_SarN16x4: sar = True; size = 16; break;
7227 case Iop_SarN32x2: sar = True; size = 32; break;
7228 default: vassert(0);
7229 }
7230
7231 if (shl || shr) {
7232 assign(
7233 g1,
florian99dd03e2013-01-29 03:56:06 +00007234 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007235 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00007236 binop(op, mkexpr(g0), mkexpr(amt8)),
7237 mkU64(0)
sewardj8711f662005-05-09 17:52:56 +00007238 )
7239 );
7240 } else
7241 if (sar) {
7242 assign(
7243 g1,
florian99dd03e2013-01-29 03:56:06 +00007244 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007245 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00007246 binop(op, mkexpr(g0), mkexpr(amt8)),
7247 binop(op, mkexpr(g0), mkU8(size-1))
sewardj8711f662005-05-09 17:52:56 +00007248 )
7249 );
7250 } else {
7251 vassert(0);
7252 }
7253
7254 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7255 return delta;
7256}
7257
7258
sewardj3d8107c2005-05-09 22:23:38 +00007259/* Vector by scalar shift of E by an immediate byte. This is a
7260 straight copy of dis_SSE_shiftE_imm. */
7261
7262static
florian55085f82012-11-21 00:36:55 +00007263ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
sewardj3d8107c2005-05-09 22:23:38 +00007264{
7265 Bool shl, shr, sar;
7266 UChar rm = getUChar(delta);
7267 IRTemp e0 = newTemp(Ity_I64);
7268 IRTemp e1 = newTemp(Ity_I64);
7269 UChar amt, size;
7270 vassert(epartIsReg(rm));
7271 vassert(gregLO3ofRM(rm) == 2
7272 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00007273 amt = getUChar(delta+1);
sewardj3d8107c2005-05-09 22:23:38 +00007274 delta += 2;
7275 DIP("%s $%d,%s\n", opname,
7276 (Int)amt,
7277 nameMMXReg(eregLO3ofRM(rm)) );
7278
7279 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7280
7281 shl = shr = sar = False;
7282 size = 0;
7283 switch (op) {
7284 case Iop_ShlN16x4: shl = True; size = 16; break;
7285 case Iop_ShlN32x2: shl = True; size = 32; break;
7286 case Iop_Shl64: shl = True; size = 64; break;
7287 case Iop_SarN16x4: sar = True; size = 16; break;
7288 case Iop_SarN32x2: sar = True; size = 32; break;
7289 case Iop_ShrN16x4: shr = True; size = 16; break;
7290 case Iop_ShrN32x2: shr = True; size = 32; break;
7291 case Iop_Shr64: shr = True; size = 64; break;
7292 default: vassert(0);
7293 }
7294
7295 if (shl || shr) {
7296 assign( e1, amt >= size
7297 ? mkU64(0)
7298 : binop(op, mkexpr(e0), mkU8(amt))
7299 );
7300 } else
7301 if (sar) {
7302 assign( e1, amt >= size
7303 ? binop(op, mkexpr(e0), mkU8(size-1))
7304 : binop(op, mkexpr(e0), mkU8(amt))
7305 );
7306 } else {
7307 vassert(0);
7308 }
7309
7310 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7311 return delta;
7312}
sewardj8711f662005-05-09 17:52:56 +00007313
7314
7315/* Completely handle all MMX instructions except emms. */
7316
7317static
sewardj2e28ac42008-12-04 00:05:12 +00007318ULong dis_MMX ( Bool* decode_ok,
floriancacba8e2014-12-15 18:58:07 +00007319 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
sewardj8711f662005-05-09 17:52:56 +00007320{
7321 Int len;
7322 UChar modrm;
7323 HChar dis_buf[50];
7324 UChar opc = getUChar(delta);
7325 delta++;
7326
7327 /* dis_MMX handles all insns except emms. */
7328 do_MMX_preamble();
7329
7330 switch (opc) {
7331
sewardj3d8107c2005-05-09 22:23:38 +00007332 case 0x6E:
7333 if (sz == 4) {
7334 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7335 modrm = getUChar(delta);
7336 if (epartIsReg(modrm)) {
7337 delta++;
7338 putMMXReg(
7339 gregLO3ofRM(modrm),
7340 binop( Iop_32HLto64,
7341 mkU32(0),
7342 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7343 DIP("movd %s, %s\n",
7344 nameIReg32(eregOfRexRM(pfx,modrm)),
7345 nameMMXReg(gregLO3ofRM(modrm)));
7346 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007347 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007348 delta += len;
7349 putMMXReg(
7350 gregLO3ofRM(modrm),
7351 binop( Iop_32HLto64,
7352 mkU32(0),
7353 loadLE(Ity_I32, mkexpr(addr)) ) );
7354 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7355 }
7356 }
7357 else
7358 if (sz == 8) {
7359 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7360 modrm = getUChar(delta);
7361 if (epartIsReg(modrm)) {
7362 delta++;
7363 putMMXReg( gregLO3ofRM(modrm),
7364 getIReg64(eregOfRexRM(pfx,modrm)) );
7365 DIP("movd %s, %s\n",
7366 nameIReg64(eregOfRexRM(pfx,modrm)),
7367 nameMMXReg(gregLO3ofRM(modrm)));
7368 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007369 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007370 delta += len;
7371 putMMXReg( gregLO3ofRM(modrm),
7372 loadLE(Ity_I64, mkexpr(addr)) );
7373 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7374 }
7375 }
7376 else {
7377 goto mmx_decode_failure;
7378 }
7379 break;
7380
7381 case 0x7E:
7382 if (sz == 4) {
7383 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7384 modrm = getUChar(delta);
7385 if (epartIsReg(modrm)) {
7386 delta++;
7387 putIReg32( eregOfRexRM(pfx,modrm),
7388 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7389 DIP("movd %s, %s\n",
7390 nameMMXReg(gregLO3ofRM(modrm)),
7391 nameIReg32(eregOfRexRM(pfx,modrm)));
7392 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007393 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007394 delta += len;
7395 storeLE( mkexpr(addr),
7396 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7397 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7398 }
7399 }
7400 else
7401 if (sz == 8) {
7402 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7403 modrm = getUChar(delta);
7404 if (epartIsReg(modrm)) {
7405 delta++;
7406 putIReg64( eregOfRexRM(pfx,modrm),
7407 getMMXReg(gregLO3ofRM(modrm)) );
7408 DIP("movd %s, %s\n",
7409 nameMMXReg(gregLO3ofRM(modrm)),
7410 nameIReg64(eregOfRexRM(pfx,modrm)));
7411 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007412 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007413 delta += len;
7414 storeLE( mkexpr(addr),
7415 getMMXReg(gregLO3ofRM(modrm)) );
7416 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7417 }
7418 } else {
7419 goto mmx_decode_failure;
7420 }
7421 break;
sewardj8711f662005-05-09 17:52:56 +00007422
7423 case 0x6F:
7424 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007425 if (sz != 4
7426 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007427 goto mmx_decode_failure;
7428 modrm = getUChar(delta);
7429 if (epartIsReg(modrm)) {
7430 delta++;
7431 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7432 DIP("movq %s, %s\n",
7433 nameMMXReg(eregLO3ofRM(modrm)),
7434 nameMMXReg(gregLO3ofRM(modrm)));
7435 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007436 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007437 delta += len;
7438 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7439 DIP("movq %s, %s\n",
7440 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7441 }
7442 break;
7443
7444 case 0x7F:
7445 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
sewardj7bdd1bc2008-12-13 16:49:46 +00007446 if (sz != 4
7447 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007448 goto mmx_decode_failure;
7449 modrm = getUChar(delta);
7450 if (epartIsReg(modrm)) {
sewardjf2d61c42012-08-23 19:00:06 +00007451 delta++;
7452 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7453 DIP("movq %s, %s\n",
7454 nameMMXReg(gregLO3ofRM(modrm)),
7455 nameMMXReg(eregLO3ofRM(modrm)));
sewardj8711f662005-05-09 17:52:56 +00007456 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007457 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007458 delta += len;
7459 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7460 DIP("mov(nt)q %s, %s\n",
7461 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7462 }
7463 break;
7464
7465 case 0xFC:
7466 case 0xFD:
7467 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7468 if (sz != 4)
7469 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007470 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
sewardj8711f662005-05-09 17:52:56 +00007471 break;
7472
7473 case 0xEC:
7474 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007475 if (sz != 4
7476 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007477 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007478 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
sewardj8711f662005-05-09 17:52:56 +00007479 break;
7480
7481 case 0xDC:
7482 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7483 if (sz != 4)
7484 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007485 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
sewardj8711f662005-05-09 17:52:56 +00007486 break;
7487
7488 case 0xF8:
7489 case 0xF9:
7490 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7491 if (sz != 4)
7492 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007493 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
sewardj8711f662005-05-09 17:52:56 +00007494 break;
7495
7496 case 0xE8:
7497 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7498 if (sz != 4)
7499 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007500 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
sewardj8711f662005-05-09 17:52:56 +00007501 break;
7502
7503 case 0xD8:
7504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7505 if (sz != 4)
7506 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007507 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
sewardj8711f662005-05-09 17:52:56 +00007508 break;
7509
7510 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7511 if (sz != 4)
7512 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007513 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
sewardj8711f662005-05-09 17:52:56 +00007514 break;
7515
7516 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7517 if (sz != 4)
7518 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007519 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
sewardj8711f662005-05-09 17:52:56 +00007520 break;
7521
7522 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7523 vassert(sz == 4);
sewardj2e28ac42008-12-04 00:05:12 +00007524 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
sewardj8711f662005-05-09 17:52:56 +00007525 break;
7526
7527 case 0x74:
7528 case 0x75:
7529 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7530 if (sz != 4)
7531 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007532 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
sewardj8711f662005-05-09 17:52:56 +00007533 break;
7534
7535 case 0x64:
7536 case 0x65:
7537 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7538 if (sz != 4)
7539 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
sewardj8711f662005-05-09 17:52:56 +00007541 break;
7542
7543 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7544 if (sz != 4)
7545 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
sewardj8711f662005-05-09 17:52:56 +00007547 break;
7548
7549 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7550 if (sz != 4)
7551 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007552 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
sewardj8711f662005-05-09 17:52:56 +00007553 break;
7554
7555 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7556 if (sz != 4)
7557 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007558 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
sewardj8711f662005-05-09 17:52:56 +00007559 break;
7560
7561 case 0x68:
7562 case 0x69:
7563 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj23063322009-01-24 10:34:19 +00007564 if (sz != 4
7565 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007566 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
sewardj8711f662005-05-09 17:52:56 +00007568 break;
7569
7570 case 0x60:
7571 case 0x61:
7572 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007573 if (sz != 4
7574 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007575 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007576 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
sewardj8711f662005-05-09 17:52:56 +00007577 break;
7578
7579 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7580 if (sz != 4)
7581 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007582 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
sewardj8711f662005-05-09 17:52:56 +00007583 break;
7584
7585 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7586 if (sz != 4)
7587 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007588 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
sewardj8711f662005-05-09 17:52:56 +00007589 break;
7590
7591 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7592 if (sz != 4)
7593 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
sewardj8711f662005-05-09 17:52:56 +00007595 break;
7596
7597 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7598 if (sz != 4)
7599 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007600 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
sewardj8711f662005-05-09 17:52:56 +00007601 break;
7602
sewardj2e28ac42008-12-04 00:05:12 +00007603# define SHIFT_BY_REG(_name,_op) \
7604 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
sewardj8711f662005-05-09 17:52:56 +00007605 break;
7606
7607 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7608 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7609 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7610 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7611
7612 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7613 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7614 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7615 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7616
7617 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7618 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7619 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7620
7621# undef SHIFT_BY_REG
sewardj3d8107c2005-05-09 22:23:38 +00007622
7623 case 0x71:
7624 case 0x72:
7625 case 0x73: {
7626 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
sewardjca673ab2005-05-11 10:03:08 +00007627 UChar byte2, subopc;
sewardj3d8107c2005-05-09 22:23:38 +00007628 if (sz != 4)
7629 goto mmx_decode_failure;
sewardjca673ab2005-05-11 10:03:08 +00007630 byte2 = getUChar(delta); /* amode / sub-opcode */
7631 subopc = toUChar( (byte2 >> 3) & 7 );
sewardj3d8107c2005-05-09 22:23:38 +00007632
7633# define SHIFT_BY_IMM(_name,_op) \
7634 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7635 } while (0)
7636
7637 if (subopc == 2 /*SRL*/ && opc == 0x71)
7638 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7639 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7640 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7641 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7642 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7643
7644 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7645 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7646 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7647 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7648
7649 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7650 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7651 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7652 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7653 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7654 SHIFT_BY_IMM("psllq", Iop_Shl64);
7655
7656 else goto mmx_decode_failure;
7657
7658# undef SHIFT_BY_IMM
7659 break;
7660 }
sewardj8711f662005-05-09 17:52:56 +00007661
sewardj02f79f12007-09-01 18:59:53 +00007662 case 0xF7: {
7663 IRTemp addr = newTemp(Ity_I64);
7664 IRTemp regD = newTemp(Ity_I64);
7665 IRTemp regM = newTemp(Ity_I64);
7666 IRTemp mask = newTemp(Ity_I64);
7667 IRTemp olddata = newTemp(Ity_I64);
7668 IRTemp newdata = newTemp(Ity_I64);
7669
7670 modrm = getUChar(delta);
7671 if (sz != 4 || (!epartIsReg(modrm)))
7672 goto mmx_decode_failure;
7673 delta++;
7674
sewardj2e28ac42008-12-04 00:05:12 +00007675 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
sewardj02f79f12007-09-01 18:59:53 +00007676 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7677 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7678 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7679 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7680 assign( newdata,
7681 binop(Iop_Or64,
7682 binop(Iop_And64,
7683 mkexpr(regD),
7684 mkexpr(mask) ),
7685 binop(Iop_And64,
7686 mkexpr(olddata),
7687 unop(Iop_Not64, mkexpr(mask)))) );
7688 storeLE( mkexpr(addr), mkexpr(newdata) );
7689 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7690 nameMMXReg( gregLO3ofRM(modrm) ) );
7691 break;
7692 }
7693
sewardj8711f662005-05-09 17:52:56 +00007694 /* --- MMX decode failure --- */
7695 default:
7696 mmx_decode_failure:
7697 *decode_ok = False;
7698 return delta; /* ignored */
7699
7700 }
7701
7702 *decode_ok = True;
7703 return delta;
7704}
7705
7706
sewardj33ef9c22005-11-04 20:05:57 +00007707/*------------------------------------------------------------*/
7708/*--- More misc arithmetic and other obscure insns. ---*/
7709/*------------------------------------------------------------*/
7710
7711/* Generate base << amt with vacated places filled with stuff
7712 from xtra. amt guaranteed in 0 .. 63. */
7713static
7714IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7715{
7716 /* if amt == 0
7717 then base
7718 else (base << amt) | (xtra >>u (64-amt))
7719 */
7720 return
florian99dd03e2013-01-29 03:56:06 +00007721 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007722 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
sewardj33ef9c22005-11-04 20:05:57 +00007723 binop(Iop_Or64,
7724 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7725 binop(Iop_Shr64, mkexpr(xtra),
7726 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
florian99dd03e2013-01-29 03:56:06 +00007727 ),
7728 mkexpr(base)
sewardj33ef9c22005-11-04 20:05:57 +00007729 );
7730}
7731
7732/* Generate base >>u amt with vacated places filled with stuff
7733 from xtra. amt guaranteed in 0 .. 63. */
7734static
7735IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7736{
7737 /* if amt == 0
7738 then base
7739 else (base >>u amt) | (xtra << (64-amt))
7740 */
7741 return
florian99dd03e2013-01-29 03:56:06 +00007742 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007743 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
sewardj33ef9c22005-11-04 20:05:57 +00007744 binop(Iop_Or64,
7745 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7746 binop(Iop_Shl64, mkexpr(xtra),
7747 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
florian99dd03e2013-01-29 03:56:06 +00007748 ),
7749 mkexpr(base)
sewardj33ef9c22005-11-04 20:05:57 +00007750 );
7751}
7752
7753/* Double length left and right shifts. Apparently only required in
7754 v-size (no b- variant). */
7755static
floriancacba8e2014-12-15 18:58:07 +00007756ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00007757 Prefix pfx,
sewardj33ef9c22005-11-04 20:05:57 +00007758 Long delta, UChar modrm,
7759 Int sz,
7760 IRExpr* shift_amt,
7761 Bool amt_is_literal,
florian55085f82012-11-21 00:36:55 +00007762 const HChar* shift_amt_txt,
sewardj33ef9c22005-11-04 20:05:57 +00007763 Bool left_shift )
7764{
7765 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7766 for printing it. And eip on entry points at the modrm byte. */
7767 Int len;
7768 HChar dis_buf[50];
7769
7770 IRType ty = szToITy(sz);
7771 IRTemp gsrc = newTemp(ty);
7772 IRTemp esrc = newTemp(ty);
7773 IRTemp addr = IRTemp_INVALID;
7774 IRTemp tmpSH = newTemp(Ity_I8);
7775 IRTemp tmpSS = newTemp(Ity_I8);
7776 IRTemp tmp64 = IRTemp_INVALID;
7777 IRTemp res64 = IRTemp_INVALID;
7778 IRTemp rss64 = IRTemp_INVALID;
7779 IRTemp resTy = IRTemp_INVALID;
7780 IRTemp rssTy = IRTemp_INVALID;
7781 Int mask = sz==8 ? 63 : 31;
7782
7783 vassert(sz == 2 || sz == 4 || sz == 8);
7784
7785 /* The E-part is the destination; this is shifted. The G-part
7786 supplies bits to be shifted into the E-part, but is not
7787 changed.
7788
7789 If shifting left, form a double-length word with E at the top
7790 and G at the bottom, and shift this left. The result is then in
7791 the high part.
7792
7793 If shifting right, form a double-length word with G at the top
7794 and E at the bottom, and shift this right. The result is then
7795 at the bottom. */
7796
7797 /* Fetch the operands. */
7798
7799 assign( gsrc, getIRegG(sz, pfx, modrm) );
7800
7801 if (epartIsReg(modrm)) {
7802 delta++;
7803 assign( esrc, getIRegE(sz, pfx, modrm) );
7804 DIP("sh%cd%c %s, %s, %s\n",
7805 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7806 shift_amt_txt,
7807 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7808 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007809 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj75ce3652005-11-04 20:49:36 +00007810 /* # bytes following amode */
7811 amt_is_literal ? 1 : 0 );
sewardj33ef9c22005-11-04 20:05:57 +00007812 delta += len;
7813 assign( esrc, loadLE(ty, mkexpr(addr)) );
7814 DIP("sh%cd%c %s, %s, %s\n",
7815 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7816 shift_amt_txt,
7817 nameIRegG(sz, pfx, modrm), dis_buf);
7818 }
7819
7820 /* Calculate the masked shift amount (tmpSH), the masked subshift
7821 amount (tmpSS), the shifted value (res64) and the subshifted
7822 value (rss64). */
7823
7824 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7825 assign( tmpSS, binop(Iop_And8,
7826 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7827 mkU8(mask)));
7828
7829 tmp64 = newTemp(Ity_I64);
7830 res64 = newTemp(Ity_I64);
7831 rss64 = newTemp(Ity_I64);
7832
7833 if (sz == 2 || sz == 4) {
7834
7835 /* G is xtra; E is data */
7836 /* what a freaking nightmare: */
7837 if (sz == 4 && left_shift) {
7838 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7839 assign( res64,
7840 binop(Iop_Shr64,
7841 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7842 mkU8(32)) );
7843 assign( rss64,
7844 binop(Iop_Shr64,
7845 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7846 mkU8(32)) );
7847 }
7848 else
7849 if (sz == 4 && !left_shift) {
7850 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7851 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7852 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7853 }
7854 else
7855 if (sz == 2 && left_shift) {
7856 assign( tmp64,
7857 binop(Iop_32HLto64,
7858 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7859 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7860 ));
sewardjcc3d2192013-03-27 11:37:33 +00007861 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
sewardj33ef9c22005-11-04 20:05:57 +00007862 assign( res64,
7863 binop(Iop_Shr64,
7864 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7865 mkU8(48)) );
7866 /* subshift formed by shifting [esrc'0000'0000'0000] */
7867 assign( rss64,
7868 binop(Iop_Shr64,
7869 binop(Iop_Shl64,
7870 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
7871 mkU8(48)),
7872 mkexpr(tmpSS)),
7873 mkU8(48)) );
7874 }
7875 else
7876 if (sz == 2 && !left_shift) {
7877 assign( tmp64,
7878 binop(Iop_32HLto64,
7879 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
7880 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
7881 ));
7882 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
7883 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7884 /* subshift formed by shifting [0000'0000'0000'esrc] */
7885 assign( rss64, binop(Iop_Shr64,
7886 unop(Iop_16Uto64, mkexpr(esrc)),
7887 mkexpr(tmpSS)) );
7888 }
7889
7890 } else {
7891
7892 vassert(sz == 8);
7893 if (left_shift) {
7894 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
7895 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
7896 } else {
7897 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
7898 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
7899 }
7900
7901 }
7902
7903 resTy = newTemp(ty);
7904 rssTy = newTemp(ty);
7905 assign( resTy, narrowTo(ty, mkexpr(res64)) );
7906 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
7907
7908 /* Put result back and write the flags thunk. */
7909 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
7910 resTy, rssTy, ty, tmpSH );
7911
7912 if (epartIsReg(modrm)) {
7913 putIRegE(sz, pfx, modrm, mkexpr(resTy));
7914 } else {
7915 storeLE( mkexpr(addr), mkexpr(resTy) );
7916 }
7917
7918 if (amt_is_literal) delta++;
7919 return delta;
7920}
sewardj9ed16802005-08-24 10:46:19 +00007921
7922
7923/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
7924 required. */
7925
7926typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
7927
florian55085f82012-11-21 00:36:55 +00007928static const HChar* nameBtOp ( BtOp op )
sewardj9ed16802005-08-24 10:46:19 +00007929{
7930 switch (op) {
7931 case BtOpNone: return "";
7932 case BtOpSet: return "s";
7933 case BtOpReset: return "r";
7934 case BtOpComp: return "c";
7935 default: vpanic("nameBtOp(amd64)");
7936 }
7937}
7938
7939
7940static
floriancacba8e2014-12-15 18:58:07 +00007941ULong dis_bt_G_E ( const VexAbiInfo* vbi,
sewardj38b1d692013-10-15 17:21:42 +00007942 Prefix pfx, Int sz, Long delta, BtOp op,
7943 /*OUT*/Bool* decode_OK )
sewardj9ed16802005-08-24 10:46:19 +00007944{
7945 HChar dis_buf[50];
7946 UChar modrm;
7947 Int len;
7948 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
sewardj38b1d692013-10-15 17:21:42 +00007949 t_addr1, t_rsp, t_mask, t_new;
sewardj9ed16802005-08-24 10:46:19 +00007950
7951 vassert(sz == 2 || sz == 4 || sz == 8);
7952
7953 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
sewardje9d8a262009-07-01 08:06:34 +00007954 = t_addr0 = t_addr1 = t_rsp
7955 = t_mask = t_new = IRTemp_INVALID;
sewardj9ed16802005-08-24 10:46:19 +00007956
7957 t_fetched = newTemp(Ity_I8);
sewardje9d8a262009-07-01 08:06:34 +00007958 t_new = newTemp(Ity_I8);
sewardj9ed16802005-08-24 10:46:19 +00007959 t_bitno0 = newTemp(Ity_I64);
7960 t_bitno1 = newTemp(Ity_I64);
7961 t_bitno2 = newTemp(Ity_I8);
7962 t_addr1 = newTemp(Ity_I64);
7963 modrm = getUChar(delta);
7964
sewardj38b1d692013-10-15 17:21:42 +00007965 *decode_OK = True;
7966 if (epartIsReg(modrm)) {
7967 /* F2 and F3 are never acceptable. */
7968 if (haveF2orF3(pfx)) {
7969 *decode_OK = False;
7970 return delta;
7971 }
7972 } else {
7973 /* F2 or F3 (but not both) are allowed, provided LOCK is also
7974 present, and only for the BTC/BTS/BTR cases (not BT). */
7975 if (haveF2orF3(pfx)) {
7976 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
7977 *decode_OK = False;
7978 return delta;
7979 }
7980 }
7981 }
7982
sewardj9ed16802005-08-24 10:46:19 +00007983 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
7984
7985 if (epartIsReg(modrm)) {
7986 delta++;
sewardj02834302010-07-29 18:10:51 +00007987 /* Get it onto the client's stack. Oh, this is a horrible
7988 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
7989 Because of the ELF ABI stack redzone, there may be live data
7990 up to 128 bytes below %RSP. So we can't just push it on the
7991 stack, else we may wind up trashing live data, and causing
7992 impossible-to-find simulation errors. (Yes, this did
7993 happen.) So we need to drop RSP before at least 128 before
7994 pushing it. That unfortunately means hitting Memcheck's
7995 fast-case painting code. Ideally we should drop more than
7996 128, to reduce the chances of breaking buggy programs that
7997 have live data below -128(%RSP). Memcheck fast-cases moves
7998 of 288 bytes due to the need to handle ppc64-linux quickly,
7999 so let's use 288. Of course the real fix is to get rid of
8000 this kludge entirely. */
sewardj9ed16802005-08-24 10:46:19 +00008001 t_rsp = newTemp(Ity_I64);
8002 t_addr0 = newTemp(Ity_I64);
8003
sewardj02834302010-07-29 18:10:51 +00008004 vassert(vbi->guest_stack_redzone_size == 128);
8005 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00008006 putIReg64(R_RSP, mkexpr(t_rsp));
8007
8008 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8009
8010 /* Make t_addr0 point at it. */
8011 assign( t_addr0, mkexpr(t_rsp) );
8012
8013 /* Mask out upper bits of the shift amount, since we're doing a
8014 reg. */
8015 assign( t_bitno1, binop(Iop_And64,
8016 mkexpr(t_bitno0),
8017 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8018
8019 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008020 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj9ed16802005-08-24 10:46:19 +00008021 delta += len;
8022 assign( t_bitno1, mkexpr(t_bitno0) );
8023 }
8024
8025 /* At this point: t_addr0 is the address being operated on. If it
8026 was a reg, we will have pushed it onto the client's stack.
8027 t_bitno1 is the bit number, suitably masked in the case of a
8028 reg. */
8029
8030 /* Now the main sequence. */
8031 assign( t_addr1,
8032 binop(Iop_Add64,
8033 mkexpr(t_addr0),
8034 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8035
8036 /* t_addr1 now holds effective address */
8037
8038 assign( t_bitno2,
8039 unop(Iop_64to8,
8040 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8041
8042 /* t_bitno2 contains offset of bit within byte */
8043
8044 if (op != BtOpNone) {
8045 t_mask = newTemp(Ity_I8);
8046 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8047 }
8048
8049 /* t_mask is now a suitable byte mask */
8050
8051 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8052
8053 if (op != BtOpNone) {
8054 switch (op) {
sewardje9d8a262009-07-01 08:06:34 +00008055 case BtOpSet:
8056 assign( t_new,
8057 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00008058 break;
sewardje9d8a262009-07-01 08:06:34 +00008059 case BtOpComp:
8060 assign( t_new,
8061 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00008062 break;
sewardje9d8a262009-07-01 08:06:34 +00008063 case BtOpReset:
8064 assign( t_new,
8065 binop(Iop_And8, mkexpr(t_fetched),
8066 unop(Iop_Not8, mkexpr(t_mask))) );
sewardj9ed16802005-08-24 10:46:19 +00008067 break;
8068 default:
8069 vpanic("dis_bt_G_E(amd64)");
8070 }
sewardj6491f862013-10-15 17:29:19 +00008071 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
sewardje9d8a262009-07-01 08:06:34 +00008072 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8073 mkexpr(t_new)/*new*/,
8074 guest_RIP_curr_instr );
8075 } else {
8076 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8077 }
sewardj9ed16802005-08-24 10:46:19 +00008078 }
sewardje9d8a262009-07-01 08:06:34 +00008079
sewardj9ed16802005-08-24 10:46:19 +00008080 /* Side effect done; now get selected bit into Carry flag */
8081 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
8082 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8083 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8084 stmt( IRStmt_Put(
8085 OFFB_CC_DEP1,
8086 binop(Iop_And64,
8087 binop(Iop_Shr64,
8088 unop(Iop_8Uto64, mkexpr(t_fetched)),
8089 mkexpr(t_bitno2)),
8090 mkU64(1)))
8091 );
8092 /* Set NDEP even though it isn't used. This makes redundant-PUT
8093 elimination of previous stores to this field work better. */
8094 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8095
8096 /* Move reg operand from stack back to reg */
8097 if (epartIsReg(modrm)) {
sewardje9d8a262009-07-01 08:06:34 +00008098 /* t_rsp still points at it. */
sewardj25d23862006-05-12 17:47:21 +00008099 /* only write the reg if actually modifying it; doing otherwise
8100 zeroes the top half erroneously when doing btl due to
8101 standard zero-extend rule */
sewardje9d8a262009-07-01 08:06:34 +00008102 if (op != BtOpNone)
sewardj25d23862006-05-12 17:47:21 +00008103 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
sewardj02834302010-07-29 18:10:51 +00008104 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00008105 }
8106
8107 DIP("bt%s%c %s, %s\n",
8108 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8109 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8110
8111 return delta;
8112}
sewardjf53b7352005-04-06 20:01:56 +00008113
8114
8115
8116/* Handle BSF/BSR. Only v-size seems necessary. */
8117static
floriancacba8e2014-12-15 18:58:07 +00008118ULong dis_bs_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008119 Prefix pfx, Int sz, Long delta, Bool fwds )
sewardjf53b7352005-04-06 20:01:56 +00008120{
8121 Bool isReg;
8122 UChar modrm;
8123 HChar dis_buf[50];
8124
8125 IRType ty = szToITy(sz);
8126 IRTemp src = newTemp(ty);
8127 IRTemp dst = newTemp(ty);
8128 IRTemp src64 = newTemp(Ity_I64);
8129 IRTemp dst64 = newTemp(Ity_I64);
sewardj009230b2013-01-26 11:47:55 +00008130 IRTemp srcB = newTemp(Ity_I1);
sewardjf53b7352005-04-06 20:01:56 +00008131
8132 vassert(sz == 8 || sz == 4 || sz == 2);
8133
8134 modrm = getUChar(delta);
8135 isReg = epartIsReg(modrm);
8136 if (isReg) {
8137 delta++;
8138 assign( src, getIRegE(sz, pfx, modrm) );
8139 } else {
8140 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00008141 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjf53b7352005-04-06 20:01:56 +00008142 delta += len;
8143 assign( src, loadLE(ty, mkexpr(addr)) );
8144 }
8145
8146 DIP("bs%c%c %s, %s\n",
8147 fwds ? 'f' : 'r', nameISize(sz),
8148 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8149 nameIRegG(sz, pfx, modrm));
8150
8151 /* First, widen src to 64 bits if it is not already. */
8152 assign( src64, widenUto64(mkexpr(src)) );
8153
sewardj009230b2013-01-26 11:47:55 +00008154 /* Generate a bool expression which is zero iff the original is
sewardje13074c2012-11-08 10:57:08 +00008155 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8156 instrumented by Memcheck, is instrumented expensively, since
8157 this may be used on the output of a preceding movmskb insn,
8158 which has been known to be partially defined, and in need of
8159 careful handling. */
sewardj009230b2013-01-26 11:47:55 +00008160 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
sewardjf53b7352005-04-06 20:01:56 +00008161
8162 /* Flags: Z is 1 iff source value is zero. All others
8163 are undefined -- we force them to zero. */
8164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8165 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8166 stmt( IRStmt_Put(
8167 OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00008168 IRExpr_ITE( mkexpr(srcB),
8169 /* src!=0 */
8170 mkU64(0),
8171 /* src==0 */
8172 mkU64(AMD64G_CC_MASK_Z)
sewardjf53b7352005-04-06 20:01:56 +00008173 )
8174 ));
8175 /* Set NDEP even though it isn't used. This makes redundant-PUT
8176 elimination of previous stores to this field work better. */
8177 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8178
8179 /* Result: iff source value is zero, we can't use
8180 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8181 But anyway, amd64 semantics say the result is undefined in
8182 such situations. Hence handle the zero case specially. */
8183
8184 /* Bleh. What we compute:
8185
8186 bsf64: if src == 0 then {dst is unchanged}
8187 else Ctz64(src)
8188
8189 bsr64: if src == 0 then {dst is unchanged}
8190 else 63 - Clz64(src)
8191
8192 bsf32: if src == 0 then {dst is unchanged}
8193 else Ctz64(32Uto64(src))
8194
8195 bsr32: if src == 0 then {dst is unchanged}
8196 else 63 - Clz64(32Uto64(src))
8197
8198 bsf16: if src == 0 then {dst is unchanged}
8199 else Ctz64(32Uto64(16Uto32(src)))
8200
8201 bsr16: if src == 0 then {dst is unchanged}
8202 else 63 - Clz64(32Uto64(16Uto32(src)))
8203 */
8204
8205 /* The main computation, guarding against zero. */
8206 assign( dst64,
florian99dd03e2013-01-29 03:56:06 +00008207 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00008208 mkexpr(srcB),
sewardjf53b7352005-04-06 20:01:56 +00008209 /* src != 0 */
8210 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8211 : binop(Iop_Sub64,
8212 mkU64(63),
florian99dd03e2013-01-29 03:56:06 +00008213 unop(Iop_Clz64, mkexpr(src64))),
8214 /* src == 0 -- leave dst unchanged */
8215 widenUto64( getIRegG( sz, pfx, modrm ) )
sewardjf53b7352005-04-06 20:01:56 +00008216 )
8217 );
8218
8219 if (sz == 2)
sewardje58967e2005-04-27 11:50:56 +00008220 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
sewardjf53b7352005-04-06 20:01:56 +00008221 else
8222 if (sz == 4)
8223 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8224 else
8225 assign( dst, mkexpr(dst64) );
8226
8227 /* dump result back */
8228 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8229
8230 return delta;
8231}
sewardja6b93d12005-02-17 09:28:28 +00008232
8233
8234/* swap rAX with the reg specified by reg and REX.B */
8235static
sewardj5b470602005-02-27 13:10:48 +00008236void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
sewardja6b93d12005-02-17 09:28:28 +00008237{
8238 IRType ty = szToITy(sz);
8239 IRTemp t1 = newTemp(ty);
8240 IRTemp t2 = newTemp(ty);
tom0fb4cbd2011-08-10 12:58:03 +00008241 vassert(sz == 2 || sz == 4 || sz == 8);
sewardj5b470602005-02-27 13:10:48 +00008242 vassert(regLo3 < 8);
sewardj2d4fcd52005-05-18 11:47:47 +00008243 if (sz == 8) {
8244 assign( t1, getIReg64(R_RAX) );
8245 assign( t2, getIRegRexB(8, pfx, regLo3) );
8246 putIReg64( R_RAX, mkexpr(t2) );
8247 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00008248 } else if (sz == 4) {
sewardj2d4fcd52005-05-18 11:47:47 +00008249 assign( t1, getIReg32(R_RAX) );
8250 assign( t2, getIRegRexB(4, pfx, regLo3) );
8251 putIReg32( R_RAX, mkexpr(t2) );
8252 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00008253 } else {
8254 assign( t1, getIReg16(R_RAX) );
8255 assign( t2, getIRegRexB(2, pfx, regLo3) );
8256 putIReg16( R_RAX, mkexpr(t2) );
8257 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
sewardj2d4fcd52005-05-18 11:47:47 +00008258 }
sewardja6b93d12005-02-17 09:28:28 +00008259 DIP("xchg%c %s, %s\n",
sewardj5b470602005-02-27 13:10:48 +00008260 nameISize(sz), nameIRegRAX(sz),
sewardj2d4fcd52005-05-18 11:47:47 +00008261 nameIRegRexB(sz,pfx, regLo3));
sewardja6b93d12005-02-17 09:28:28 +00008262}
8263
8264
sewardj905edbd2007-04-07 12:25:37 +00008265static
8266void codegen_SAHF ( void )
8267{
8268 /* Set the flags to:
8269 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8270 -- retain the old O flag
8271 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8272 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8273 */
8274 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8275 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8276 IRTemp oldflags = newTemp(Ity_I64);
8277 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8278 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8279 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8281 stmt( IRStmt_Put( OFFB_CC_DEP1,
8282 binop(Iop_Or64,
8283 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8284 binop(Iop_And64,
8285 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8286 mkU64(mask_SZACP))
8287 )
8288 ));
8289}
8290
8291
8292static
8293void codegen_LAHF ( void )
8294{
8295 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8296 IRExpr* rax_with_hole;
8297 IRExpr* new_byte;
8298 IRExpr* new_rax;
8299 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8300 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8301
8302 IRTemp flags = newTemp(Ity_I64);
8303 assign( flags, mk_amd64g_calculate_rflags_all() );
8304
8305 rax_with_hole
8306 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8307 new_byte
8308 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8309 mkU64(1<<1));
8310 new_rax
8311 = binop(Iop_Or64, rax_with_hole,
8312 binop(Iop_Shl64, new_byte, mkU8(8)));
8313 putIReg64(R_RAX, new_rax);
8314}
8315
sewardja6b93d12005-02-17 09:28:28 +00008316
8317static
sewardjd0aa0a52006-08-17 01:20:01 +00008318ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
floriancacba8e2014-12-15 18:58:07 +00008319 const VexAbiInfo* vbi,
sewardjd0aa0a52006-08-17 01:20:01 +00008320 Prefix pfx,
8321 Int size,
8322 Long delta0 )
sewardja6b93d12005-02-17 09:28:28 +00008323{
8324 HChar dis_buf[50];
8325 Int len;
8326
8327 IRType ty = szToITy(size);
8328 IRTemp acc = newTemp(ty);
8329 IRTemp src = newTemp(ty);
8330 IRTemp dest = newTemp(ty);
8331 IRTemp dest2 = newTemp(ty);
8332 IRTemp acc2 = newTemp(ty);
sewardj009230b2013-01-26 11:47:55 +00008333 IRTemp cond = newTemp(Ity_I1);
sewardja6b93d12005-02-17 09:28:28 +00008334 IRTemp addr = IRTemp_INVALID;
8335 UChar rm = getUChar(delta0);
8336
sewardje9d8a262009-07-01 08:06:34 +00008337 /* There are 3 cases to consider:
8338
8339 reg-reg: ignore any lock prefix, generate sequence based
florian99dd03e2013-01-29 03:56:06 +00008340 on ITE
sewardje9d8a262009-07-01 08:06:34 +00008341
8342 reg-mem, not locked: ignore any lock prefix, generate sequence
florian99dd03e2013-01-29 03:56:06 +00008343 based on ITE
sewardje9d8a262009-07-01 08:06:34 +00008344
8345 reg-mem, locked: use IRCAS
8346 */
8347
sewardj38b1d692013-10-15 17:21:42 +00008348 /* Decide whether F2 or F3 are acceptable. Never for register
8349 case, but for the memory case, one or the other is OK provided
8350 LOCK is also present. */
8351 if (epartIsReg(rm)) {
8352 if (haveF2orF3(pfx)) {
8353 *ok = False;
8354 return delta0;
8355 }
8356 } else {
8357 if (haveF2orF3(pfx)) {
8358 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8359 *ok = False;
8360 return delta0;
8361 }
8362 }
8363 }
8364
sewardja6b93d12005-02-17 09:28:28 +00008365 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00008366 /* case 1 */
sewardj5b470602005-02-27 13:10:48 +00008367 assign( dest, getIRegE(size, pfx, rm) );
sewardja6b93d12005-02-17 09:28:28 +00008368 delta0++;
sewardje9d8a262009-07-01 08:06:34 +00008369 assign( src, getIRegG(size, pfx, rm) );
8370 assign( acc, getIRegRAX(size) );
8371 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008372 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008373 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8374 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008375 putIRegRAX(size, mkexpr(acc2));
8376 putIRegE(size, pfx, rm, mkexpr(dest2));
sewardja6b93d12005-02-17 09:28:28 +00008377 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00008378 nameIRegG(size,pfx,rm),
8379 nameIRegE(size,pfx,rm) );
sewardje9d8a262009-07-01 08:06:34 +00008380 }
sewardj6491f862013-10-15 17:29:19 +00008381 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008382 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008383 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardja6b93d12005-02-17 09:28:28 +00008384 assign( dest, loadLE(ty, mkexpr(addr)) );
8385 delta0 += len;
sewardje9d8a262009-07-01 08:06:34 +00008386 assign( src, getIRegG(size, pfx, rm) );
8387 assign( acc, getIRegRAX(size) );
8388 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008389 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008390 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8391 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008392 putIRegRAX(size, mkexpr(acc2));
8393 storeLE( mkexpr(addr), mkexpr(dest2) );
sewardja6b93d12005-02-17 09:28:28 +00008394 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00008395 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008396 }
sewardj6491f862013-10-15 17:29:19 +00008397 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008398 /* case 3 */
8399 /* src is new value. acc is expected value. dest is old value.
8400 Compute success from the output of the IRCAS, and steer the
8401 new value for RAX accordingly: in case of success, RAX is
8402 unchanged. */
8403 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8404 delta0 += len;
8405 assign( src, getIRegG(size, pfx, rm) );
8406 assign( acc, getIRegRAX(size) );
8407 stmt( IRStmt_CAS(
8408 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8409 NULL, mkexpr(acc), NULL, mkexpr(src) )
8410 ));
8411 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008412 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008413 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008414 putIRegRAX(size, mkexpr(acc2));
sewardj40d1d212009-07-12 13:01:17 +00008415 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8416 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008417 }
sewardje9d8a262009-07-01 08:06:34 +00008418 else vassert(0);
sewardja6b93d12005-02-17 09:28:28 +00008419
sewardjd0aa0a52006-08-17 01:20:01 +00008420 *ok = True;
sewardja6b93d12005-02-17 09:28:28 +00008421 return delta0;
8422}
8423
sewardj3ca55a12005-01-27 16:06:23 +00008424
8425/* Handle conditional move instructions of the form
8426 cmovcc E(reg-or-mem), G(reg)
8427
8428 E(src) is reg-or-mem
8429 G(dst) is reg.
8430
8431 If E is reg, --> GET %E, tmps
8432 GET %G, tmpd
8433 CMOVcc tmps, tmpd
8434 PUT tmpd, %G
8435
8436 If E is mem --> (getAddr E) -> tmpa
8437 LD (tmpa), tmps
8438 GET %G, tmpd
8439 CMOVcc tmps, tmpd
8440 PUT tmpd, %G
8441*/
8442static
floriancacba8e2014-12-15 18:58:07 +00008443ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008444 Prefix pfx,
sewardj3ca55a12005-01-27 16:06:23 +00008445 Int sz,
8446 AMD64Condcode cond,
sewardj270def42005-07-03 01:03:01 +00008447 Long delta0 )
sewardj3ca55a12005-01-27 16:06:23 +00008448{
sewardj8c332e22005-01-28 01:36:56 +00008449 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00008450 HChar dis_buf[50];
8451 Int len;
8452
8453 IRType ty = szToITy(sz);
8454 IRTemp tmps = newTemp(ty);
8455 IRTemp tmpd = newTemp(ty);
8456
8457 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00008458 assign( tmps, getIRegE(sz, pfx, rm) );
8459 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008460
sewardj5b470602005-02-27 13:10:48 +00008461 putIRegG( sz, pfx, rm,
florian99dd03e2013-01-29 03:56:06 +00008462 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8463 mkexpr(tmps),
8464 mkexpr(tmpd) )
sewardj3ca55a12005-01-27 16:06:23 +00008465 );
sewardje941eea2005-01-30 19:52:28 +00008466 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
sewardj5b470602005-02-27 13:10:48 +00008467 nameIRegE(sz,pfx,rm),
8468 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008469 return 1+delta0;
8470 }
8471
8472 /* E refers to memory */
8473 {
sewardj2e28ac42008-12-04 00:05:12 +00008474 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00008475 assign( tmps, loadLE(ty, mkexpr(addr)) );
sewardj5b470602005-02-27 13:10:48 +00008476 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008477
sewardj5b470602005-02-27 13:10:48 +00008478 putIRegG( sz, pfx, rm,
florian99dd03e2013-01-29 03:56:06 +00008479 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8480 mkexpr(tmps),
8481 mkexpr(tmpd) )
sewardj3ca55a12005-01-27 16:06:23 +00008482 );
8483
sewardj7eaa7cf2005-01-31 18:55:22 +00008484 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8485 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00008486 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008487 return len+delta0;
8488 }
8489}
8490
8491
sewardjb4fd2e72005-03-23 13:34:11 +00008492static
8493ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
floriancacba8e2014-12-15 18:58:07 +00008494 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008495 Prefix pfx, Int sz, Long delta0 )
sewardjb4fd2e72005-03-23 13:34:11 +00008496{
8497 Int len;
8498 UChar rm = getUChar(delta0);
8499 HChar dis_buf[50];
8500
8501 IRType ty = szToITy(sz);
8502 IRTemp tmpd = newTemp(ty);
8503 IRTemp tmpt0 = newTemp(ty);
8504 IRTemp tmpt1 = newTemp(ty);
sewardje9d8a262009-07-01 08:06:34 +00008505
8506 /* There are 3 cases to consider:
8507
sewardjc2433a82010-05-10 20:51:22 +00008508 reg-reg: ignore any lock prefix,
8509 generate 'naive' (non-atomic) sequence
sewardje9d8a262009-07-01 08:06:34 +00008510
8511 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8512 (non-atomic) sequence
8513
8514 reg-mem, locked: use IRCAS
8515 */
sewardjb4fd2e72005-03-23 13:34:11 +00008516
8517 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00008518 /* case 1 */
sewardjc2433a82010-05-10 20:51:22 +00008519 assign( tmpd, getIRegE(sz, pfx, rm) );
8520 assign( tmpt0, getIRegG(sz, pfx, rm) );
8521 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8522 mkexpr(tmpd), mkexpr(tmpt0)) );
8523 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8524 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8525 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8526 DIP("xadd%c %s, %s\n",
sewardjcc3d2192013-03-27 11:37:33 +00008527 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
sewardjc2433a82010-05-10 20:51:22 +00008528 *decode_ok = True;
8529 return 1+delta0;
sewardje9d8a262009-07-01 08:06:34 +00008530 }
sewardj6491f862013-10-15 17:29:19 +00008531 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008532 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008533 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardjb4fd2e72005-03-23 13:34:11 +00008534 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8535 assign( tmpt0, getIRegG(sz, pfx, rm) );
sewardje9d8a262009-07-01 08:06:34 +00008536 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8537 mkexpr(tmpd), mkexpr(tmpt0)) );
sewardjb4fd2e72005-03-23 13:34:11 +00008538 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8539 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8540 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8541 DIP("xadd%c %s, %s\n",
8542 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
sewardje9d8a262009-07-01 08:06:34 +00008543 *decode_ok = True;
sewardjb4fd2e72005-03-23 13:34:11 +00008544 return len+delta0;
8545 }
sewardj6491f862013-10-15 17:29:19 +00008546 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008547 /* case 3 */
8548 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8549 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8550 assign( tmpt0, getIRegG(sz, pfx, rm) );
8551 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8552 mkexpr(tmpd), mkexpr(tmpt0)) );
8553 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8554 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8555 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8556 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8557 DIP("xadd%c %s, %s\n",
8558 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8559 *decode_ok = True;
8560 return len+delta0;
8561 }
8562 /*UNREACHED*/
8563 vassert(0);
sewardjb4fd2e72005-03-23 13:34:11 +00008564}
8565
sewardjd20c8852005-01-20 20:04:07 +00008566//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8567//..
8568//.. static
sewardj270def42005-07-03 01:03:01 +00008569//.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
sewardjd20c8852005-01-20 20:04:07 +00008570//.. {
8571//.. Int len;
8572//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008573//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008574//.. HChar dis_buf[50];
8575//..
8576//.. if (epartIsReg(rm)) {
8577//.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8578//.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8579//.. return 1+delta0;
8580//.. } else {
8581//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8582//.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8583//.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8584//.. return len+delta0;
8585//.. }
8586//.. }
8587//..
8588//.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8589//.. dst is ireg and sz==4, zero out top half of it. */
8590//..
8591//.. static
8592//.. UInt dis_mov_Sw_Ew ( UChar sorb,
8593//.. Int sz,
8594//.. UInt delta0 )
8595//.. {
8596//.. Int len;
8597//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008598//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008599//.. HChar dis_buf[50];
8600//..
8601//.. vassert(sz == 2 || sz == 4);
8602//..
8603//.. if (epartIsReg(rm)) {
8604//.. if (sz == 4)
8605//.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8606//.. else
8607//.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8608//..
8609//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8610//.. return 1+delta0;
8611//.. } else {
8612//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8613//.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8614//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8615//.. return len+delta0;
8616//.. }
8617//.. }
8618//..
8619//..
8620//.. static
8621//.. void dis_push_segreg ( UInt sreg, Int sz )
8622//.. {
8623//.. IRTemp t1 = newTemp(Ity_I16);
8624//.. IRTemp ta = newTemp(Ity_I32);
8625//.. vassert(sz == 2 || sz == 4);
8626//..
8627//.. assign( t1, getSReg(sreg) );
8628//.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8629//.. putIReg(4, R_ESP, mkexpr(ta));
8630//.. storeLE( mkexpr(ta), mkexpr(t1) );
8631//..
8632//.. DIP("pushw %s\n", nameSReg(sreg));
8633//.. }
8634//..
8635//.. static
8636//.. void dis_pop_segreg ( UInt sreg, Int sz )
8637//.. {
8638//.. IRTemp t1 = newTemp(Ity_I16);
8639//.. IRTemp ta = newTemp(Ity_I32);
8640//.. vassert(sz == 2 || sz == 4);
8641//..
8642//.. assign( ta, getIReg(4, R_ESP) );
8643//.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8644//..
8645//.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8646//.. putSReg( sreg, mkexpr(t1) );
8647//.. DIP("pop %s\n", nameSReg(sreg));
8648//.. }
sewardj2f959cc2005-01-26 01:19:35 +00008649
8650static
floriancacba8e2014-12-15 18:58:07 +00008651void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
sewardj2f959cc2005-01-26 01:19:35 +00008652{
8653 IRTemp t1 = newTemp(Ity_I64);
8654 IRTemp t2 = newTemp(Ity_I64);
sewardj5a9ffab2005-05-12 17:55:01 +00008655 IRTemp t3 = newTemp(Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00008656 assign(t1, getIReg64(R_RSP));
8657 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
sewardj5a9ffab2005-05-12 17:55:01 +00008658 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8659 putIReg64(R_RSP, mkexpr(t3));
sewardj478646f2008-05-01 20:13:04 +00008660 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
sewardjc6f970f2012-04-02 21:54:49 +00008661 jmp_treg(dres, Ijk_Ret, t2);
8662 vassert(dres->whatNext == Dis_StopHere);
sewardj2f959cc2005-01-26 01:19:35 +00008663}
8664
sewardj5b470602005-02-27 13:10:48 +00008665
sewardj1001dc42005-02-21 08:25:55 +00008666/*------------------------------------------------------------*/
8667/*--- SSE/SSE2/SSE3 helpers ---*/
8668/*------------------------------------------------------------*/
8669
sewardj9571dc02014-01-26 18:34:23 +00008670/* Indicates whether the op requires a rounding-mode argument. Note
8671 that this covers only vector floating point arithmetic ops, and
8672 omits the scalar ones that need rounding modes. Note also that
8673 inconsistencies here will get picked up later by the IR sanity
8674 checker, so this isn't correctness-critical. */
8675static Bool requiresRMode ( IROp op )
8676{
8677 switch (op) {
8678 /* 128 bit ops */
8679 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8680 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8681 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8682 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8683 /* 256 bit ops */
8684 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8685 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8686 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8687 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8688 return True;
8689 default:
8690 break;
8691 }
8692 return False;
8693}
8694
8695
sewardj1001dc42005-02-21 08:25:55 +00008696/* Worker function; do not call directly.
8697 Handles full width G = G `op` E and G = (not G) `op` E.
8698*/
8699
sewardj8d965312005-02-25 02:48:47 +00008700static ULong dis_SSE_E_to_G_all_wrk (
floriancacba8e2014-12-15 18:58:07 +00008701 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008702 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008703 const HChar* opname, IROp op,
sewardj8d965312005-02-25 02:48:47 +00008704 Bool invertG
8705 )
sewardj9da16972005-02-21 13:58:26 +00008706{
8707 HChar dis_buf[50];
8708 Int alen;
8709 IRTemp addr;
8710 UChar rm = getUChar(delta);
sewardj9571dc02014-01-26 18:34:23 +00008711 Bool needsRMode = requiresRMode(op);
sewardj9da16972005-02-21 13:58:26 +00008712 IRExpr* gpart
8713 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8714 : getXMMReg(gregOfRexRM(pfx,rm));
8715 if (epartIsReg(rm)) {
sewardj9571dc02014-01-26 18:34:23 +00008716 putXMMReg(
8717 gregOfRexRM(pfx,rm),
8718 needsRMode
8719 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8720 gpart,
8721 getXMMReg(eregOfRexRM(pfx,rm)))
8722 : binop(op, gpart,
8723 getXMMReg(eregOfRexRM(pfx,rm)))
8724 );
sewardj9da16972005-02-21 13:58:26 +00008725 DIP("%s %s,%s\n", opname,
8726 nameXMMReg(eregOfRexRM(pfx,rm)),
8727 nameXMMReg(gregOfRexRM(pfx,rm)) );
8728 return delta+1;
8729 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj9571dc02014-01-26 18:34:23 +00008731 putXMMReg(
8732 gregOfRexRM(pfx,rm),
8733 needsRMode
8734 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8735 gpart,
8736 loadLE(Ity_V128, mkexpr(addr)))
8737 : binop(op, gpart,
8738 loadLE(Ity_V128, mkexpr(addr)))
8739 );
sewardj9da16972005-02-21 13:58:26 +00008740 DIP("%s %s,%s\n", opname,
8741 dis_buf,
8742 nameXMMReg(gregOfRexRM(pfx,rm)) );
8743 return delta+alen;
8744 }
8745}
8746
8747
8748/* All lanes SSE binary operation, G = G `op` E. */
8749
8750static
floriancacba8e2014-12-15 18:58:07 +00008751ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008752 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008753 const HChar* opname, IROp op )
sewardj9da16972005-02-21 13:58:26 +00008754{
sewardj2e28ac42008-12-04 00:05:12 +00008755 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
sewardj9da16972005-02-21 13:58:26 +00008756}
8757
sewardj8d965312005-02-25 02:48:47 +00008758/* All lanes SSE binary operation, G = (not G) `op` E. */
8759
8760static
floriancacba8e2014-12-15 18:58:07 +00008761ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008762 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008763 const HChar* opname, IROp op )
sewardj8d965312005-02-25 02:48:47 +00008764{
sewardj2e28ac42008-12-04 00:05:12 +00008765 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
sewardj8d965312005-02-25 02:48:47 +00008766}
8767
8768
8769/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8770
floriancacba8e2014-12-15 18:58:07 +00008771static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008772 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008773 const HChar* opname, IROp op )
sewardj8d965312005-02-25 02:48:47 +00008774{
8775 HChar dis_buf[50];
8776 Int alen;
8777 IRTemp addr;
8778 UChar rm = getUChar(delta);
sewardj9c9ee3d2005-02-26 01:17:42 +00008779 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
sewardj8d965312005-02-25 02:48:47 +00008780 if (epartIsReg(rm)) {
sewardj9c9ee3d2005-02-26 01:17:42 +00008781 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00008782 binop(op, gpart,
8783 getXMMReg(eregOfRexRM(pfx,rm))) );
8784 DIP("%s %s,%s\n", opname,
8785 nameXMMReg(eregOfRexRM(pfx,rm)),
8786 nameXMMReg(gregOfRexRM(pfx,rm)) );
8787 return delta+1;
8788 } else {
8789 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8790 E operand needs to be made simply of zeroes. */
8791 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008792 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00008793 assign( epart, unop( Iop_32UtoV128,
8794 loadLE(Ity_I32, mkexpr(addr))) );
8795 putXMMReg( gregOfRexRM(pfx,rm),
8796 binop(op, gpart, mkexpr(epart)) );
8797 DIP("%s %s,%s\n", opname,
8798 dis_buf,
8799 nameXMMReg(gregOfRexRM(pfx,rm)) );
8800 return delta+alen;
8801 }
8802}
sewardj1001dc42005-02-21 08:25:55 +00008803
8804
8805/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
8806
floriancacba8e2014-12-15 18:58:07 +00008807static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00008808 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008809 const HChar* opname, IROp op )
sewardj1001dc42005-02-21 08:25:55 +00008810{
8811 HChar dis_buf[50];
8812 Int alen;
8813 IRTemp addr;
8814 UChar rm = getUChar(delta);
8815 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8816 if (epartIsReg(rm)) {
8817 putXMMReg( gregOfRexRM(pfx,rm),
8818 binop(op, gpart,
8819 getXMMReg(eregOfRexRM(pfx,rm))) );
8820 DIP("%s %s,%s\n", opname,
8821 nameXMMReg(eregOfRexRM(pfx,rm)),
8822 nameXMMReg(gregOfRexRM(pfx,rm)) );
8823 return delta+1;
8824 } else {
8825 /* We can only do a 64-bit memory read, so the upper half of the
8826 E operand needs to be made simply of zeroes. */
8827 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008828 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008829 assign( epart, unop( Iop_64UtoV128,
8830 loadLE(Ity_I64, mkexpr(addr))) );
8831 putXMMReg( gregOfRexRM(pfx,rm),
8832 binop(op, gpart, mkexpr(epart)) );
8833 DIP("%s %s,%s\n", opname,
8834 dis_buf,
8835 nameXMMReg(gregOfRexRM(pfx,rm)) );
8836 return delta+alen;
8837 }
8838}
8839
8840
sewardja7ba8c42005-05-10 20:08:34 +00008841/* All lanes unary SSE operation, G = op(E). */
8842
8843static ULong dis_SSE_E_to_G_unary_all (
floriancacba8e2014-12-15 18:58:07 +00008844 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008845 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008846 const HChar* opname, IROp op
sewardja7ba8c42005-05-10 20:08:34 +00008847 )
8848{
8849 HChar dis_buf[50];
8850 Int alen;
8851 IRTemp addr;
8852 UChar rm = getUChar(delta);
8853 if (epartIsReg(rm)) {
8854 putXMMReg( gregOfRexRM(pfx,rm),
8855 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
8856 DIP("%s %s,%s\n", opname,
8857 nameXMMReg(eregOfRexRM(pfx,rm)),
8858 nameXMMReg(gregOfRexRM(pfx,rm)) );
8859 return delta+1;
8860 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008861 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008862 putXMMReg( gregOfRexRM(pfx,rm),
8863 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
8864 DIP("%s %s,%s\n", opname,
8865 dis_buf,
8866 nameXMMReg(gregOfRexRM(pfx,rm)) );
8867 return delta+alen;
8868 }
8869}
8870
8871
8872/* Lowest 32-bit lane only unary SSE operation, G = op(E). */
8873
8874static ULong dis_SSE_E_to_G_unary_lo32 (
floriancacba8e2014-12-15 18:58:07 +00008875 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008876 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008877 const HChar* opname, IROp op
sewardja7ba8c42005-05-10 20:08:34 +00008878 )
8879{
8880 /* First we need to get the old G value and patch the low 32 bits
8881 of the E operand into it. Then apply op and write back to G. */
8882 HChar dis_buf[50];
8883 Int alen;
8884 IRTemp addr;
8885 UChar rm = getUChar(delta);
8886 IRTemp oldG0 = newTemp(Ity_V128);
8887 IRTemp oldG1 = newTemp(Ity_V128);
8888
8889 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8890
8891 if (epartIsReg(rm)) {
8892 assign( oldG1,
8893 binop( Iop_SetV128lo32,
8894 mkexpr(oldG0),
8895 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
8896 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8897 DIP("%s %s,%s\n", opname,
8898 nameXMMReg(eregOfRexRM(pfx,rm)),
8899 nameXMMReg(gregOfRexRM(pfx,rm)) );
8900 return delta+1;
8901 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008903 assign( oldG1,
8904 binop( Iop_SetV128lo32,
8905 mkexpr(oldG0),
8906 loadLE(Ity_I32, mkexpr(addr)) ));
8907 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8908 DIP("%s %s,%s\n", opname,
8909 dis_buf,
8910 nameXMMReg(gregOfRexRM(pfx,rm)) );
8911 return delta+alen;
8912 }
8913}
sewardj1001dc42005-02-21 08:25:55 +00008914
8915
8916/* Lowest 64-bit lane only unary SSE operation, G = op(E). */
8917
sewardj8d965312005-02-25 02:48:47 +00008918static ULong dis_SSE_E_to_G_unary_lo64 (
floriancacba8e2014-12-15 18:58:07 +00008919 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008920 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008921 const HChar* opname, IROp op
sewardj8d965312005-02-25 02:48:47 +00008922 )
sewardj1001dc42005-02-21 08:25:55 +00008923{
8924 /* First we need to get the old G value and patch the low 64 bits
8925 of the E operand into it. Then apply op and write back to G. */
8926 HChar dis_buf[50];
8927 Int alen;
8928 IRTemp addr;
8929 UChar rm = getUChar(delta);
8930 IRTemp oldG0 = newTemp(Ity_V128);
8931 IRTemp oldG1 = newTemp(Ity_V128);
8932
8933 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8934
8935 if (epartIsReg(rm)) {
8936 assign( oldG1,
8937 binop( Iop_SetV128lo64,
8938 mkexpr(oldG0),
8939 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
8940 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8941 DIP("%s %s,%s\n", opname,
8942 nameXMMReg(eregOfRexRM(pfx,rm)),
8943 nameXMMReg(gregOfRexRM(pfx,rm)) );
8944 return delta+1;
8945 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008946 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008947 assign( oldG1,
8948 binop( Iop_SetV128lo64,
8949 mkexpr(oldG0),
8950 loadLE(Ity_I64, mkexpr(addr)) ));
8951 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8952 DIP("%s %s,%s\n", opname,
8953 dis_buf,
8954 nameXMMReg(gregOfRexRM(pfx,rm)) );
8955 return delta+alen;
8956 }
8957}
8958
8959
sewardj09717342005-05-05 21:34:02 +00008960/* SSE integer binary operation:
8961 G = G `op` E (eLeft == False)
8962 G = E `op` G (eLeft == True)
8963*/
8964static ULong dis_SSEint_E_to_G(
floriancacba8e2014-12-15 18:58:07 +00008965 const VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008966 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008967 const HChar* opname, IROp op,
sewardj09717342005-05-05 21:34:02 +00008968 Bool eLeft
8969 )
8970{
8971 HChar dis_buf[50];
8972 Int alen;
8973 IRTemp addr;
8974 UChar rm = getUChar(delta);
8975 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8976 IRExpr* epart = NULL;
8977 if (epartIsReg(rm)) {
8978 epart = getXMMReg(eregOfRexRM(pfx,rm));
8979 DIP("%s %s,%s\n", opname,
8980 nameXMMReg(eregOfRexRM(pfx,rm)),
8981 nameXMMReg(gregOfRexRM(pfx,rm)) );
8982 delta += 1;
8983 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj09717342005-05-05 21:34:02 +00008985 epart = loadLE(Ity_V128, mkexpr(addr));
8986 DIP("%s %s,%s\n", opname,
8987 dis_buf,
8988 nameXMMReg(gregOfRexRM(pfx,rm)) );
8989 delta += alen;
8990 }
8991 putXMMReg( gregOfRexRM(pfx,rm),
8992 eLeft ? binop(op, epart, gpart)
sewardjcc3d2192013-03-27 11:37:33 +00008993 : binop(op, gpart, epart) );
sewardj09717342005-05-05 21:34:02 +00008994 return delta;
8995}
sewardj8d965312005-02-25 02:48:47 +00008996
8997
sewardjc4530ae2012-05-21 10:18:49 +00008998/* Helper for doing SSE FP comparisons. False return ==> unhandled.
8999 This is all a bit of a kludge in that it ignores the subtleties of
9000 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9001 spec. */
9002static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP,
9003 /*OUT*/IROp* opP,
9004 /*OUT*/Bool* postNotP,
9005 UInt imm8, Bool all_lanes, Int sz )
sewardj8d965312005-02-25 02:48:47 +00009006{
sewardjc4530ae2012-05-21 10:18:49 +00009007 if (imm8 >= 32) return False;
9008
9009 /* First, compute a (preSwap, op, postNot) triple from
9010 the supplied imm8. */
9011 Bool pre = False;
9012 IROp op = Iop_INVALID;
9013 Bool not = False;
9014
9015# define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; }
sewardjd698a052012-06-25 07:40:54 +00009016 // If you add a case here, add a corresponding test for both VCMPSD_128
9017 // and VCMPSS_128 in avx-1.c.
sewardjc4530ae2012-05-21 10:18:49 +00009018 switch (imm8) {
sewardj2f8c0b92012-07-14 14:20:00 +00009019 // "O" = ordered, "U" = unordered
9020 // "Q" = non-signalling (quiet), "S" = signalling
9021 //
9022 // swap operands?
9023 // |
9024 // | cmp op invert after?
9025 // | | |
9026 // v v v
sewardjd698a052012-06-25 07:40:54 +00009027 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9028 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS
9029 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS
9030 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9031 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9032 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US
9033 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US
9034 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q
tom4d780eb2013-03-27 15:38:47 +00009035 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
sewardj2f8c0b92012-07-14 14:20:00 +00009036 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US
sewardjc4530ae2012-05-21 10:18:49 +00009037 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */
sewardjd698a052012-06-25 07:40:54 +00009038 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US
9039 // 0xB FALSE_OQ
sewardj68b01f72012-06-27 10:27:13 +00009040 // 0xC: this isn't really right because it returns all-1s when
9041 // either operand is a NaN, and it should return all-0s.
sewardjd698a052012-06-25 07:40:54 +00009042 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9043 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS
9044 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS
9045 // 0xF TRUE_UQ
9046 // 0x10 EQ_OS
9047 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9048 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9049 // 0x13 UNORD_S
9050 // 0x14 NEQ_US
9051 // 0x15 NLT_UQ
9052 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9053 // 0x17 ORD_S
9054 // 0x18 EQ_US
9055 // 0x19 NGE_UQ
9056 // 0x1A NGT_UQ
9057 // 0x1B FALSE_OS
9058 // 0x1C NEQ_OS
9059 // 0x1D GE_OQ
9060 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9061 // 0x1F TRUE_US
sewardjc93904b2012-05-27 13:50:42 +00009062 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9063 avx-1.c if new cases turn up. */
sewardjc4530ae2012-05-21 10:18:49 +00009064 default: break;
9065 }
9066# undef XXX
9067 if (op == Iop_INVALID) return False;
9068
9069 /* Now convert the op into one with the same arithmetic but that is
9070 correct for the width and laneage requirements. */
9071
9072 /**/ if (sz == 4 && all_lanes) {
9073 switch (op) {
9074 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9075 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9076 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9077 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9078 default: vassert(0);
9079 }
9080 }
9081 else if (sz == 4 && !all_lanes) {
9082 switch (op) {
9083 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9084 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9085 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9086 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9087 default: vassert(0);
9088 }
9089 }
9090 else if (sz == 8 && all_lanes) {
9091 switch (op) {
9092 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9093 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9094 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9095 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9096 default: vassert(0);
9097 }
9098 }
9099 else if (sz == 8 && !all_lanes) {
9100 switch (op) {
9101 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9102 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9103 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9104 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9105 default: vassert(0);
9106 }
9107 }
9108 else {
9109 vpanic("findSSECmpOp(amd64,guest)");
sewardj8d965312005-02-25 02:48:47 +00009110 }
9111
sewardjc4530ae2012-05-21 10:18:49 +00009112 *preSwapP = pre; *opP = op; *postNotP = not;
9113 return True;
sewardj8d965312005-02-25 02:48:47 +00009114}
9115
sewardj8d965312005-02-25 02:48:47 +00009116
sewardjc4530ae2012-05-21 10:18:49 +00009117/* Handles SSE 32F/64F comparisons. It can fail, in which case it
9118 returns the original delta to indicate failure. */
9119
floriancacba8e2014-12-15 18:58:07 +00009120static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00009121 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00009122 const HChar* opname, Bool all_lanes, Int sz )
sewardj8d965312005-02-25 02:48:47 +00009123{
sewardjc4530ae2012-05-21 10:18:49 +00009124 Long delta0 = delta;
sewardj8d965312005-02-25 02:48:47 +00009125 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +00009126 Int alen;
9127 UInt imm8;
sewardj8d965312005-02-25 02:48:47 +00009128 IRTemp addr;
sewardjc4530ae2012-05-21 10:18:49 +00009129 Bool preSwap = False;
sewardj8d965312005-02-25 02:48:47 +00009130 IROp op = Iop_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +00009131 Bool postNot = False;
sewardj8d965312005-02-25 02:48:47 +00009132 IRTemp plain = newTemp(Ity_V128);
9133 UChar rm = getUChar(delta);
9134 UShort mask = 0;
9135 vassert(sz == 4 || sz == 8);
9136 if (epartIsReg(rm)) {
9137 imm8 = getUChar(delta+1);
sewardjc4530ae2012-05-21 10:18:49 +00009138 if (imm8 >= 8) return delta0; /* FAIL */
9139 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9140 if (!ok) return delta0; /* FAIL */
9141 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardj8d965312005-02-25 02:48:47 +00009142 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9143 getXMMReg(eregOfRexRM(pfx,rm))) );
9144 delta += 2;
9145 DIP("%s $%d,%s,%s\n", opname,
9146 (Int)imm8,
9147 nameXMMReg(eregOfRexRM(pfx,rm)),
9148 nameXMMReg(gregOfRexRM(pfx,rm)) );
9149 } else {
sewardj2e28ac42008-12-04 00:05:12 +00009150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj8d965312005-02-25 02:48:47 +00009151 imm8 = getUChar(delta+alen);
sewardjc4530ae2012-05-21 10:18:49 +00009152 if (imm8 >= 8) return delta0; /* FAIL */
9153 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9154 if (!ok) return delta0; /* FAIL */
9155 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardjab9055b2006-01-01 13:17:38 +00009156 assign( plain,
9157 binop(
9158 op,
9159 getXMMReg(gregOfRexRM(pfx,rm)),
sewardjc4530ae2012-05-21 10:18:49 +00009160 all_lanes
9161 ? loadLE(Ity_V128, mkexpr(addr))
9162 : sz == 8
9163 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9164 : /*sz==4*/
9165 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
sewardjcc3d2192013-03-27 11:37:33 +00009166 )
sewardjab9055b2006-01-01 13:17:38 +00009167 );
sewardj8d965312005-02-25 02:48:47 +00009168 delta += alen+1;
9169 DIP("%s $%d,%s,%s\n", opname,
9170 (Int)imm8,
9171 dis_buf,
9172 nameXMMReg(gregOfRexRM(pfx,rm)) );
9173 }
9174
sewardjc4530ae2012-05-21 10:18:49 +00009175 if (postNot && all_lanes) {
sewardj9c9ee3d2005-02-26 01:17:42 +00009176 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00009177 unop(Iop_NotV128, mkexpr(plain)) );
9178 }
9179 else
sewardjc4530ae2012-05-21 10:18:49 +00009180 if (postNot && !all_lanes) {
sewardj1027dc22005-02-26 01:55:02 +00009181 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
sewardj8d965312005-02-25 02:48:47 +00009182 putXMMReg( gregOfRexRM(pfx,rm),
9183 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9184 }
9185 else {
9186 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9187 }
9188
9189 return delta;
9190}
9191
9192
sewardjadffcef2005-05-11 00:03:06 +00009193/* Vector by scalar shift of G by the amount specified at the bottom
9194 of E. */
9195
floriancacba8e2014-12-15 18:58:07 +00009196static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00009197 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00009198 const HChar* opname, IROp op )
sewardjadffcef2005-05-11 00:03:06 +00009199{
9200 HChar dis_buf[50];
9201 Int alen, size;
9202 IRTemp addr;
9203 Bool shl, shr, sar;
9204 UChar rm = getUChar(delta);
9205 IRTemp g0 = newTemp(Ity_V128);
9206 IRTemp g1 = newTemp(Ity_V128);
sewardj4c0a7ac2012-06-21 09:08:19 +00009207 IRTemp amt = newTemp(Ity_I64);
sewardjadffcef2005-05-11 00:03:06 +00009208 IRTemp amt8 = newTemp(Ity_I8);
9209 if (epartIsReg(rm)) {
sewardj4c0a7ac2012-06-21 09:08:19 +00009210 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
sewardjadffcef2005-05-11 00:03:06 +00009211 DIP("%s %s,%s\n", opname,
9212 nameXMMReg(eregOfRexRM(pfx,rm)),
9213 nameXMMReg(gregOfRexRM(pfx,rm)) );
9214 delta++;
9215 } else {
sewardj2e28ac42008-12-04 00:05:12 +00009216 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj4c0a7ac2012-06-21 09:08:19 +00009217 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
sewardjadffcef2005-05-11 00:03:06 +00009218 DIP("%s %s,%s\n", opname,
9219 dis_buf,
9220 nameXMMReg(gregOfRexRM(pfx,rm)) );
9221 delta += alen;
9222 }
9223 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
sewardj4c0a7ac2012-06-21 09:08:19 +00009224 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
sewardjadffcef2005-05-11 00:03:06 +00009225
9226 shl = shr = sar = False;
9227 size = 0;
9228 switch (op) {
9229 case Iop_ShlN16x8: shl = True; size = 32; break;
9230 case Iop_ShlN32x4: shl = True; size = 32; break;
9231 case Iop_ShlN64x2: shl = True; size = 64; break;
9232 case Iop_SarN16x8: sar = True; size = 16; break;
9233 case Iop_SarN32x4: sar = True; size = 32; break;
9234 case Iop_ShrN16x8: shr = True; size = 16; break;
9235 case Iop_ShrN32x4: shr = True; size = 32; break;
9236 case Iop_ShrN64x2: shr = True; size = 64; break;
9237 default: vassert(0);
9238 }
9239
9240 if (shl || shr) {
9241 assign(
9242 g1,
florian99dd03e2013-01-29 03:56:06 +00009243 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00009244 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00009245 binop(op, mkexpr(g0), mkexpr(amt8)),
9246 mkV128(0x0000)
sewardjadffcef2005-05-11 00:03:06 +00009247 )
9248 );
9249 } else
9250 if (sar) {
9251 assign(
9252 g1,
florian99dd03e2013-01-29 03:56:06 +00009253 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00009254 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00009255 binop(op, mkexpr(g0), mkexpr(amt8)),
9256 binop(op, mkexpr(g0), mkU8(size-1))
sewardjadffcef2005-05-11 00:03:06 +00009257 )
9258 );
9259 } else {
9260 vassert(0);
9261 }
9262
9263 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9264 return delta;
9265}
sewardj09717342005-05-05 21:34:02 +00009266
9267
9268/* Vector by scalar shift of E by an immediate byte. */
9269
9270static
9271ULong dis_SSE_shiftE_imm ( Prefix pfx,
florian55085f82012-11-21 00:36:55 +00009272 Long delta, const HChar* opname, IROp op )
sewardj09717342005-05-05 21:34:02 +00009273{
9274 Bool shl, shr, sar;
9275 UChar rm = getUChar(delta);
9276 IRTemp e0 = newTemp(Ity_V128);
9277 IRTemp e1 = newTemp(Ity_V128);
9278 UChar amt, size;
9279 vassert(epartIsReg(rm));
9280 vassert(gregLO3ofRM(rm) == 2
9281 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00009282 amt = getUChar(delta+1);
sewardj09717342005-05-05 21:34:02 +00009283 delta += 2;
9284 DIP("%s $%d,%s\n", opname,
9285 (Int)amt,
9286 nameXMMReg(eregOfRexRM(pfx,rm)) );
9287 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9288
9289 shl = shr = sar = False;
9290 size = 0;
9291 switch (op) {
9292 case Iop_ShlN16x8: shl = True; size = 16; break;
9293 case Iop_ShlN32x4: shl = True; size = 32; break;
9294 case Iop_ShlN64x2: shl = True; size = 64; break;
9295 case Iop_SarN16x8: sar = True; size = 16; break;
9296 case Iop_SarN32x4: sar = True; size = 32; break;
9297 case Iop_ShrN16x8: shr = True; size = 16; break;
9298 case Iop_ShrN32x4: shr = True; size = 32; break;
9299 case Iop_ShrN64x2: shr = True; size = 64; break;
9300 default: vassert(0);
9301 }
9302
9303 if (shl || shr) {
9304 assign( e1, amt >= size
9305 ? mkV128(0x0000)
9306 : binop(op, mkexpr(e0), mkU8(amt))
9307 );
9308 } else
9309 if (sar) {
9310 assign( e1, amt >= size
9311 ? binop(op, mkexpr(e0), mkU8(size-1))
9312 : binop(op, mkexpr(e0), mkU8(amt))
9313 );
9314 } else {
9315 vassert(0);
9316 }
9317
9318 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9319 return delta;
9320}
sewardj1a01e652005-02-23 11:39:21 +00009321
9322
9323/* Get the current SSE rounding mode. */
9324
9325static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9326{
9327 return
9328 unop( Iop_64to32,
9329 binop( Iop_And64,
9330 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9331 mkU64(3) ));
9332}
9333
sewardjbcbb9de2005-03-27 02:22:32 +00009334static void put_sse_roundingmode ( IRExpr* sseround )
9335{
sewardjdd40fdf2006-12-24 02:20:24 +00009336 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
sewardjbcbb9de2005-03-27 02:22:32 +00009337 stmt( IRStmt_Put( OFFB_SSEROUND,
9338 unop(Iop_32Uto64,sseround) ) );
9339}
9340
sewardj4b1cc832012-06-13 11:10:20 +00009341/* Break a V128-bit value up into four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00009342
sewardj4b1cc832012-06-13 11:10:20 +00009343static void breakupV128to32s ( IRTemp t128,
9344 /*OUTs*/
9345 IRTemp* t3, IRTemp* t2,
9346 IRTemp* t1, IRTemp* t0 )
sewardja7ba8c42005-05-10 20:08:34 +00009347{
9348 IRTemp hi64 = newTemp(Ity_I64);
9349 IRTemp lo64 = newTemp(Ity_I64);
9350 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9351 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9352
9353 vassert(t0 && *t0 == IRTemp_INVALID);
9354 vassert(t1 && *t1 == IRTemp_INVALID);
9355 vassert(t2 && *t2 == IRTemp_INVALID);
9356 vassert(t3 && *t3 == IRTemp_INVALID);
9357
9358 *t0 = newTemp(Ity_I32);
9359 *t1 = newTemp(Ity_I32);
9360 *t2 = newTemp(Ity_I32);
9361 *t3 = newTemp(Ity_I32);
9362 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9363 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9364 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9365 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9366}
9367
sewardj4b1cc832012-06-13 11:10:20 +00009368/* Construct a V128-bit value from four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00009369
sewardj4b1cc832012-06-13 11:10:20 +00009370static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9371 IRTemp t1, IRTemp t0 )
sewardja7ba8c42005-05-10 20:08:34 +00009372{
9373 return
9374 binop( Iop_64HLtoV128,
9375 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9376 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9377 );
9378}
9379
9380/* Break a 64-bit value up into four 16-bit ints. */
9381
9382static void breakup64to16s ( IRTemp t64,
9383 /*OUTs*/
9384 IRTemp* t3, IRTemp* t2,
9385 IRTemp* t1, IRTemp* t0 )
9386{
9387 IRTemp hi32 = newTemp(Ity_I32);
9388 IRTemp lo32 = newTemp(Ity_I32);
9389 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9390 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9391
9392 vassert(t0 && *t0 == IRTemp_INVALID);
9393 vassert(t1 && *t1 == IRTemp_INVALID);
9394 vassert(t2 && *t2 == IRTemp_INVALID);
9395 vassert(t3 && *t3 == IRTemp_INVALID);
9396
9397 *t0 = newTemp(Ity_I16);
9398 *t1 = newTemp(Ity_I16);
9399 *t2 = newTemp(Ity_I16);
9400 *t3 = newTemp(Ity_I16);
9401 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9402 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9403 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9404 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9405}
9406
9407/* Construct a 64-bit value from four 16-bit ints. */
9408
9409static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9410 IRTemp t1, IRTemp t0 )
9411{
9412 return
9413 binop( Iop_32HLto64,
9414 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9415 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9416 );
9417}
sewardjdf0e0022005-01-25 15:48:43 +00009418
sewardj4b1cc832012-06-13 11:10:20 +00009419/* Break a V256-bit value up into four 64-bit ints. */
9420
9421static void breakupV256to64s ( IRTemp t256,
9422 /*OUTs*/
9423 IRTemp* t3, IRTemp* t2,
9424 IRTemp* t1, IRTemp* t0 )
9425{
9426 vassert(t0 && *t0 == IRTemp_INVALID);
9427 vassert(t1 && *t1 == IRTemp_INVALID);
9428 vassert(t2 && *t2 == IRTemp_INVALID);
9429 vassert(t3 && *t3 == IRTemp_INVALID);
9430 *t0 = newTemp(Ity_I64);
9431 *t1 = newTemp(Ity_I64);
9432 *t2 = newTemp(Ity_I64);
9433 *t3 = newTemp(Ity_I64);
9434 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9435 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9436 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9437 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9438}
9439
sewardjfe0c5e72012-06-15 15:48:07 +00009440/* Break a V256-bit value up into two V128s. */
9441
9442static void breakupV256toV128s ( IRTemp t256,
9443 /*OUTs*/
9444 IRTemp* t1, IRTemp* t0 )
9445{
9446 vassert(t0 && *t0 == IRTemp_INVALID);
9447 vassert(t1 && *t1 == IRTemp_INVALID);
9448 *t0 = newTemp(Ity_V128);
9449 *t1 = newTemp(Ity_V128);
9450 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9451 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9452}
sewardjdf0e0022005-01-25 15:48:43 +00009453
sewardj4f228902012-06-21 09:17:58 +00009454/* Break a V256-bit value up into eight 32-bit ints. */
9455
9456static void breakupV256to32s ( IRTemp t256,
9457 /*OUTs*/
9458 IRTemp* t7, IRTemp* t6,
9459 IRTemp* t5, IRTemp* t4,
9460 IRTemp* t3, IRTemp* t2,
9461 IRTemp* t1, IRTemp* t0 )
9462{
9463 IRTemp t128_1 = IRTemp_INVALID;
9464 IRTemp t128_0 = IRTemp_INVALID;
9465 breakupV256toV128s( t256, &t128_1, &t128_0 );
9466 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9467 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9468}
9469
sewardjd8bca7e2012-06-20 11:46:19 +00009470/* Break a V128-bit value up into two 64-bit ints. */
9471
9472static void breakupV128to64s ( IRTemp t128,
9473 /*OUTs*/
9474 IRTemp* t1, IRTemp* t0 )
9475{
9476 vassert(t0 && *t0 == IRTemp_INVALID);
9477 vassert(t1 && *t1 == IRTemp_INVALID);
9478 *t0 = newTemp(Ity_I64);
9479 *t1 = newTemp(Ity_I64);
9480 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9481 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9482}
9483
sewardj89378162012-06-24 12:12:20 +00009484/* Construct a V256-bit value from eight 32-bit ints. */
9485
9486static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9487 IRTemp t5, IRTemp t4,
9488 IRTemp t3, IRTemp t2,
9489 IRTemp t1, IRTemp t0 )
9490{
9491 return
9492 binop( Iop_V128HLtoV256,
9493 binop( Iop_64HLtoV128,
9494 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9495 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9496 binop( Iop_64HLtoV128,
9497 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9498 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9499 );
9500}
9501
9502/* Construct a V256-bit value from four 64-bit ints. */
9503
9504static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9505 IRTemp t1, IRTemp t0 )
9506{
9507 return
9508 binop( Iop_V128HLtoV256,
9509 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9510 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9511 );
9512}
9513
sewardjd166e282008-02-06 11:42:45 +00009514/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9515 values (aa,bb), computes, for each of the 4 16-bit lanes:
9516
9517 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9518*/
9519static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9520{
9521 IRTemp aa = newTemp(Ity_I64);
9522 IRTemp bb = newTemp(Ity_I64);
9523 IRTemp aahi32s = newTemp(Ity_I64);
9524 IRTemp aalo32s = newTemp(Ity_I64);
9525 IRTemp bbhi32s = newTemp(Ity_I64);
9526 IRTemp bblo32s = newTemp(Ity_I64);
9527 IRTemp rHi = newTemp(Ity_I64);
9528 IRTemp rLo = newTemp(Ity_I64);
9529 IRTemp one32x2 = newTemp(Ity_I64);
9530 assign(aa, aax);
9531 assign(bb, bbx);
9532 assign( aahi32s,
9533 binop(Iop_SarN32x2,
9534 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9535 mkU8(16) ));
9536 assign( aalo32s,
9537 binop(Iop_SarN32x2,
9538 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9539 mkU8(16) ));
9540 assign( bbhi32s,
9541 binop(Iop_SarN32x2,
9542 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9543 mkU8(16) ));
9544 assign( bblo32s,
9545 binop(Iop_SarN32x2,
9546 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9547 mkU8(16) ));
9548 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9549 assign(
9550 rHi,
9551 binop(
9552 Iop_ShrN32x2,
9553 binop(
9554 Iop_Add32x2,
9555 binop(
9556 Iop_ShrN32x2,
9557 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9558 mkU8(14)
9559 ),
9560 mkexpr(one32x2)
9561 ),
9562 mkU8(1)
9563 )
9564 );
9565 assign(
9566 rLo,
9567 binop(
9568 Iop_ShrN32x2,
9569 binop(
9570 Iop_Add32x2,
9571 binop(
9572 Iop_ShrN32x2,
9573 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9574 mkU8(14)
9575 ),
9576 mkexpr(one32x2)
9577 ),
9578 mkU8(1)
9579 )
9580 );
9581 return
9582 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9583}
9584
9585/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9586 values (aa,bb), computes, for each lane:
9587
9588 if aa_lane < 0 then - bb_lane
9589 else if aa_lane > 0 then bb_lane
9590 else 0
9591*/
9592static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9593{
9594 IRTemp aa = newTemp(Ity_I64);
9595 IRTemp bb = newTemp(Ity_I64);
9596 IRTemp zero = newTemp(Ity_I64);
9597 IRTemp bbNeg = newTemp(Ity_I64);
9598 IRTemp negMask = newTemp(Ity_I64);
9599 IRTemp posMask = newTemp(Ity_I64);
9600 IROp opSub = Iop_INVALID;
9601 IROp opCmpGTS = Iop_INVALID;
9602
9603 switch (laneszB) {
9604 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9605 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9606 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9607 default: vassert(0);
9608 }
9609
9610 assign( aa, aax );
9611 assign( bb, bbx );
9612 assign( zero, mkU64(0) );
9613 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9614 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9615 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9616
9617 return
9618 binop(Iop_Or64,
9619 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9620 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9621
9622}
9623
sewardj97f72452012-05-23 05:56:53 +00009624
sewardjd166e282008-02-06 11:42:45 +00009625/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9626 value aa, computes, for each lane
9627
9628 if aa < 0 then -aa else aa
9629
9630 Note that the result is interpreted as unsigned, so that the
9631 absolute value of the most negative signed input can be
9632 represented.
9633*/
sewardj97f72452012-05-23 05:56:53 +00009634static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
sewardjd166e282008-02-06 11:42:45 +00009635{
sewardj97f72452012-05-23 05:56:53 +00009636 IRTemp res = newTemp(Ity_I64);
sewardjd166e282008-02-06 11:42:45 +00009637 IRTemp zero = newTemp(Ity_I64);
9638 IRTemp aaNeg = newTemp(Ity_I64);
9639 IRTemp negMask = newTemp(Ity_I64);
9640 IRTemp posMask = newTemp(Ity_I64);
9641 IROp opSub = Iop_INVALID;
9642 IROp opSarN = Iop_INVALID;
9643
9644 switch (laneszB) {
9645 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9646 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9647 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9648 default: vassert(0);
9649 }
9650
sewardjd166e282008-02-06 11:42:45 +00009651 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9652 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9653 assign( zero, mkU64(0) );
9654 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
sewardj97f72452012-05-23 05:56:53 +00009655 assign( res,
9656 binop(Iop_Or64,
9657 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9658 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9659 return res;
sewardjd166e282008-02-06 11:42:45 +00009660}
9661
sewardj97f72452012-05-23 05:56:53 +00009662/* XMM version of math_PABS_MMX. */
9663static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9664{
9665 IRTemp res = newTemp(Ity_V128);
9666 IRTemp aaHi = newTemp(Ity_I64);
9667 IRTemp aaLo = newTemp(Ity_I64);
9668 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9669 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9670 assign(res, binop(Iop_64HLtoV128,
9671 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9672 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9673 return res;
9674}
9675
9676/* Specialisations of math_PABS_XMM, since there's no easy way to do
9677 partial applications in C :-( */
9678static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9679 return math_PABS_XMM(aa, 4);
9680}
9681
sewardj8516a1f2012-06-24 14:26:30 +00009682static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9683 return math_PABS_XMM(aa, 2);
9684}
9685
9686static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9687 return math_PABS_XMM(aa, 1);
9688}
sewardj97f72452012-05-23 05:56:53 +00009689
sewardjcc3d2192013-03-27 11:37:33 +00009690/* YMM version of math_PABS_XMM. */
9691static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
9692{
9693 IRTemp res = newTemp(Ity_V256);
9694 IRTemp aaHi = IRTemp_INVALID;
9695 IRTemp aaLo = IRTemp_INVALID;
9696 breakupV256toV128s(aa, &aaHi, &aaLo);
9697 assign(res, binop(Iop_V128HLtoV256,
9698 mkexpr(math_PABS_XMM(aaHi, laneszB)),
9699 mkexpr(math_PABS_XMM(aaLo, laneszB))));
9700 return res;
9701}
9702
9703static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
9704 return math_PABS_YMM(aa, 4);
9705}
9706
9707static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
9708 return math_PABS_YMM(aa, 2);
9709}
9710
9711static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
9712 return math_PABS_YMM(aa, 1);
9713}
9714
sewardjd166e282008-02-06 11:42:45 +00009715static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9716 IRTemp lo64, Long byteShift )
9717{
9718 vassert(byteShift >= 1 && byteShift <= 7);
9719 return
9720 binop(Iop_Or64,
9721 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9722 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9723 );
9724}
9725
sewardj151cd3e2012-06-18 13:56:55 +00009726static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
9727{
9728 IRTemp res = newTemp(Ity_V128);
9729 IRTemp sHi = newTemp(Ity_I64);
9730 IRTemp sLo = newTemp(Ity_I64);
9731 IRTemp dHi = newTemp(Ity_I64);
9732 IRTemp dLo = newTemp(Ity_I64);
9733 IRTemp rHi = newTemp(Ity_I64);
9734 IRTemp rLo = newTemp(Ity_I64);
9735
9736 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
9737 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
9738 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
9739 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
9740
9741 if (imm8 == 0) {
9742 assign( rHi, mkexpr(sHi) );
9743 assign( rLo, mkexpr(sLo) );
9744 }
9745 else if (imm8 >= 1 && imm8 <= 7) {
9746 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
9747 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
9748 }
9749 else if (imm8 == 8) {
9750 assign( rHi, mkexpr(dLo) );
9751 assign( rLo, mkexpr(sHi) );
9752 }
9753 else if (imm8 >= 9 && imm8 <= 15) {
9754 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
9755 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
9756 }
9757 else if (imm8 == 16) {
9758 assign( rHi, mkexpr(dHi) );
9759 assign( rLo, mkexpr(dLo) );
9760 }
9761 else if (imm8 >= 17 && imm8 <= 23) {
9762 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
9763 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
9764 }
9765 else if (imm8 == 24) {
9766 assign( rHi, mkU64(0) );
9767 assign( rLo, mkexpr(dHi) );
9768 }
9769 else if (imm8 >= 25 && imm8 <= 31) {
9770 assign( rHi, mkU64(0) );
9771 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
9772 }
9773 else if (imm8 >= 32 && imm8 <= 255) {
9774 assign( rHi, mkU64(0) );
9775 assign( rLo, mkU64(0) );
9776 }
9777 else
9778 vassert(0);
9779
9780 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
9781 return res;
9782}
9783
9784
sewardj150c9cd2008-02-09 01:16:02 +00009785/* Generate a SIGSEGV followed by a restart of the current instruction
9786 if effective_addr is not 16-aligned. This is required behaviour
9787 for some SSE3 instructions and all 128-bit SSSE3 instructions.
9788 This assumes that guest_RIP_curr_instr is set correctly! */
sewardjc4530ae2012-05-21 10:18:49 +00009789static
9790void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
sewardj150c9cd2008-02-09 01:16:02 +00009791{
9792 stmt(
9793 IRStmt_Exit(
9794 binop(Iop_CmpNE64,
sewardjc4530ae2012-05-21 10:18:49 +00009795 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
sewardj150c9cd2008-02-09 01:16:02 +00009796 mkU64(0)),
9797 Ijk_SigSEGV,
sewardjc6f970f2012-04-02 21:54:49 +00009798 IRConst_U64(guest_RIP_curr_instr),
9799 OFFB_RIP
sewardj150c9cd2008-02-09 01:16:02 +00009800 )
9801 );
9802}
9803
sewardjc4530ae2012-05-21 10:18:49 +00009804static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
9805 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
9806}
9807
9808static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
9809 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
9810}
sewardjd166e282008-02-06 11:42:45 +00009811
sewardjc4356f02007-11-09 21:15:04 +00009812/* Helper for deciding whether a given insn (starting at the opcode
9813 byte) may validly be used with a LOCK prefix. The following insns
9814 may be used with LOCK when their destination operand is in memory.
sewardje9d8a262009-07-01 08:06:34 +00009815 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
sewardjc4356f02007-11-09 21:15:04 +00009816
sewardje9d8a262009-07-01 08:06:34 +00009817 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
9818 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
9819 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
9820 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
9821 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
9822 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
9823 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
sewardjc4356f02007-11-09 21:15:04 +00009824
9825 DEC FE /1, FF /1
9826 INC FE /0, FF /0
9827
9828 NEG F6 /3, F7 /3
9829 NOT F6 /2, F7 /2
9830
sewardje9d8a262009-07-01 08:06:34 +00009831 XCHG 86, 87
sewardjc4356f02007-11-09 21:15:04 +00009832
9833 BTC 0F BB, 0F BA /7
9834 BTR 0F B3, 0F BA /6
9835 BTS 0F AB, 0F BA /5
9836
9837 CMPXCHG 0F B0, 0F B1
9838 CMPXCHG8B 0F C7 /1
9839
9840 XADD 0F C0, 0F C1
sewardje9d8a262009-07-01 08:06:34 +00009841
9842 ------------------------------
9843
9844 80 /0 = addb $imm8, rm8
9845 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
9846 82 /0 = addb $imm8, rm8
9847 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
9848
9849 00 = addb r8, rm8
9850 01 = addl r32, rm32 and addw r16, rm16
9851
9852 Same for ADD OR ADC SBB AND SUB XOR
9853
9854 FE /1 = dec rm8
9855 FF /1 = dec rm32 and dec rm16
9856
9857 FE /0 = inc rm8
9858 FF /0 = inc rm32 and inc rm16
9859
9860 F6 /3 = neg rm8
9861 F7 /3 = neg rm32 and neg rm16
9862
9863 F6 /2 = not rm8
9864 F7 /2 = not rm32 and not rm16
9865
9866 0F BB = btcw r16, rm16 and btcl r32, rm32
9867 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
9868
9869 Same for BTS, BTR
sewardjc4356f02007-11-09 21:15:04 +00009870*/
florian8462d112014-09-24 15:18:09 +00009871static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
sewardjc4356f02007-11-09 21:15:04 +00009872{
9873 switch (opc[0]) {
sewardje9d8a262009-07-01 08:06:34 +00009874 case 0x00: case 0x01: case 0x08: case 0x09:
9875 case 0x10: case 0x11: case 0x18: case 0x19:
9876 case 0x20: case 0x21: case 0x28: case 0x29:
9877 case 0x30: case 0x31:
9878 if (!epartIsReg(opc[1]))
9879 return True;
9880 break;
sewardjc4356f02007-11-09 21:15:04 +00009881
sewardje9d8a262009-07-01 08:06:34 +00009882 case 0x80: case 0x81: case 0x82: case 0x83:
9883 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
9884 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009885 return True;
9886 break;
9887
9888 case 0xFE: case 0xFF:
sewardje9d8a262009-07-01 08:06:34 +00009889 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
9890 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009891 return True;
9892 break;
9893
9894 case 0xF6: case 0xF7:
sewardje9d8a262009-07-01 08:06:34 +00009895 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
9896 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009897 return True;
9898 break;
9899
9900 case 0x86: case 0x87:
sewardje9d8a262009-07-01 08:06:34 +00009901 if (!epartIsReg(opc[1]))
9902 return True;
9903 break;
sewardjc4356f02007-11-09 21:15:04 +00009904
9905 case 0x0F: {
9906 switch (opc[1]) {
9907 case 0xBB: case 0xB3: case 0xAB:
sewardje9d8a262009-07-01 08:06:34 +00009908 if (!epartIsReg(opc[2]))
9909 return True;
9910 break;
sewardjc4356f02007-11-09 21:15:04 +00009911 case 0xBA:
sewardje9d8a262009-07-01 08:06:34 +00009912 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
9913 && !epartIsReg(opc[2]))
sewardjc4356f02007-11-09 21:15:04 +00009914 return True;
9915 break;
9916 case 0xB0: case 0xB1:
sewardje9d8a262009-07-01 08:06:34 +00009917 if (!epartIsReg(opc[2]))
9918 return True;
9919 break;
sewardjc4356f02007-11-09 21:15:04 +00009920 case 0xC7:
sewardje9d8a262009-07-01 08:06:34 +00009921 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
sewardjc4356f02007-11-09 21:15:04 +00009922 return True;
9923 break;
9924 case 0xC0: case 0xC1:
sewardje9d8a262009-07-01 08:06:34 +00009925 if (!epartIsReg(opc[2]))
9926 return True;
9927 break;
sewardjc4356f02007-11-09 21:15:04 +00009928 default:
9929 break;
9930 } /* switch (opc[1]) */
9931 break;
9932 }
9933
9934 default:
9935 break;
9936 } /* switch (opc[0]) */
9937
9938 return False;
9939}
9940
9941
sewardjdf0e0022005-01-25 15:48:43 +00009942/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +00009943/*--- ---*/
9944/*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
9945/*--- ---*/
9946/*------------------------------------------------------------*/
9947
floriancacba8e2014-12-15 18:58:07 +00009948static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
sewardjc4530ae2012-05-21 10:18:49 +00009949 Long delta, Bool isAvx, UChar opc )
9950{
9951 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
9952 Int alen = 0;
9953 HChar dis_buf[50];
9954 IRTemp argL = newTemp(Ity_F64);
9955 IRTemp argR = newTemp(Ity_F64);
9956 UChar modrm = getUChar(delta);
9957 IRTemp addr = IRTemp_INVALID;
9958 if (epartIsReg(modrm)) {
9959 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
9960 0/*lowest lane*/ ) );
9961 delta += 1;
9962 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9963 opc==0x2E ? "u" : "",
9964 nameXMMReg(eregOfRexRM(pfx,modrm)),
9965 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9966 } else {
9967 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9968 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9969 delta += alen;
9970 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9971 opc==0x2E ? "u" : "",
9972 dis_buf,
9973 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9974 }
9975 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
9976 0/*lowest lane*/ ) );
9977
9978 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9979 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9980 stmt( IRStmt_Put(
9981 OFFB_CC_DEP1,
9982 binop( Iop_And64,
9983 unop( Iop_32Uto64,
9984 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
9985 mkU64(0x45)
9986 )));
9987 return delta;
9988}
9989
9990
floriancacba8e2014-12-15 18:58:07 +00009991static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
sewardjc4530ae2012-05-21 10:18:49 +00009992 Long delta, Bool isAvx, UChar opc )
9993{
9994 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
9995 Int alen = 0;
9996 HChar dis_buf[50];
9997 IRTemp argL = newTemp(Ity_F32);
9998 IRTemp argR = newTemp(Ity_F32);
9999 UChar modrm = getUChar(delta);
10000 IRTemp addr = IRTemp_INVALID;
10001 if (epartIsReg(modrm)) {
10002 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10003 0/*lowest lane*/ ) );
10004 delta += 1;
10005 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10006 opc==0x2E ? "u" : "",
10007 nameXMMReg(eregOfRexRM(pfx,modrm)),
10008 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10009 } else {
10010 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10011 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10012 delta += alen;
10013 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10014 opc==0x2E ? "u" : "",
10015 dis_buf,
10016 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10017 }
10018 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10019 0/*lowest lane*/ ) );
10020
10021 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10022 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10023 stmt( IRStmt_Put(
10024 OFFB_CC_DEP1,
10025 binop( Iop_And64,
10026 unop( Iop_32Uto64,
10027 binop(Iop_CmpF64,
10028 unop(Iop_F32toF64,mkexpr(argL)),
10029 unop(Iop_F32toF64,mkexpr(argR)))),
10030 mkU64(0x45)
10031 )));
10032 return delta;
10033}
10034
10035
floriancacba8e2014-12-15 18:58:07 +000010036static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjc4530ae2012-05-21 10:18:49 +000010037 Long delta, Bool writesYmm )
10038{
10039 Int order;
10040 Int alen = 0;
10041 HChar dis_buf[50];
10042 IRTemp sV = newTemp(Ity_V128);
10043 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000010044 const HChar* strV = writesYmm ? "v" : "";
sewardjc4530ae2012-05-21 10:18:49 +000010045 IRTemp addr = IRTemp_INVALID;
10046 if (epartIsReg(modrm)) {
10047 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10048 order = (Int)getUChar(delta+1);
10049 delta += 1+1;
10050 DIP("%spshufd $%d,%s,%s\n", strV, order,
10051 nameXMMReg(eregOfRexRM(pfx,modrm)),
10052 nameXMMReg(gregOfRexRM(pfx,modrm)));
10053 } else {
10054 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10055 1/*byte after the amode*/ );
10056 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10057 order = (Int)getUChar(delta+alen);
10058 delta += alen+1;
10059 DIP("%spshufd $%d,%s,%s\n", strV, order,
10060 dis_buf,
10061 nameXMMReg(gregOfRexRM(pfx,modrm)));
10062 }
10063
10064 IRTemp s3, s2, s1, s0;
10065 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010066 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +000010067
10068# define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10069 IRTemp dV = newTemp(Ity_V128);
10070 assign(dV,
sewardj4b1cc832012-06-13 11:10:20 +000010071 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10072 SEL((order>>2)&3), SEL((order>>0)&3) )
sewardjc4530ae2012-05-21 10:18:49 +000010073 );
10074# undef SEL
10075
10076 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10077 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10078 return delta;
10079}
10080
10081
floriancacba8e2014-12-15 18:58:07 +000010082static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardjcc3d2192013-03-27 11:37:33 +000010083{
10084 Int order;
10085 Int alen = 0;
10086 HChar dis_buf[50];
10087 IRTemp sV = newTemp(Ity_V256);
10088 UChar modrm = getUChar(delta);
10089 IRTemp addr = IRTemp_INVALID;
10090 UInt rG = gregOfRexRM(pfx,modrm);
10091 if (epartIsReg(modrm)) {
10092 UInt rE = eregOfRexRM(pfx,modrm);
10093 assign( sV, getYMMReg(rE) );
10094 order = (Int)getUChar(delta+1);
10095 delta += 1+1;
10096 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10097 } else {
10098 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10099 1/*byte after the amode*/ );
10100 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10101 order = (Int)getUChar(delta+alen);
10102 delta += alen+1;
10103 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10104 }
10105
10106 IRTemp s[8];
10107 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10108 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10109 &s[3], &s[2], &s[1], &s[0] );
10110
10111 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10112 s[4 + ((order>>4)&3)],
10113 s[4 + ((order>>2)&3)],
10114 s[4 + ((order>>0)&3)],
10115 s[0 + ((order>>6)&3)],
10116 s[0 + ((order>>4)&3)],
10117 s[0 + ((order>>2)&3)],
10118 s[0 + ((order>>0)&3)] ) );
10119 return delta;
10120}
10121
10122
sewardjc4530ae2012-05-21 10:18:49 +000010123static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10124{
10125 IRTemp dV = newTemp(Ity_V128);
10126 IRTemp hi64 = newTemp(Ity_I64);
10127 IRTemp lo64 = newTemp(Ity_I64);
10128 IRTemp hi64r = newTemp(Ity_I64);
10129 IRTemp lo64r = newTemp(Ity_I64);
10130
10131 vassert(imm >= 0 && imm <= 255);
10132 if (imm >= 16) {
10133 assign(dV, mkV128(0x0000));
10134 return dV;
10135 }
10136
10137 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10138 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10139
10140 if (imm == 0) {
10141 assign( lo64r, mkexpr(lo64) );
10142 assign( hi64r, mkexpr(hi64) );
10143 }
10144 else
10145 if (imm == 8) {
10146 assign( hi64r, mkU64(0) );
10147 assign( lo64r, mkexpr(hi64) );
10148 }
10149 else
10150 if (imm > 8) {
10151 assign( hi64r, mkU64(0) );
sewardj251b59e2012-05-25 13:51:07 +000010152 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
sewardjc4530ae2012-05-21 10:18:49 +000010153 } else {
sewardj251b59e2012-05-25 13:51:07 +000010154 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
sewardjc4530ae2012-05-21 10:18:49 +000010155 assign( lo64r,
10156 binop( Iop_Or64,
10157 binop(Iop_Shr64, mkexpr(lo64),
10158 mkU8(8 * imm)),
10159 binop(Iop_Shl64, mkexpr(hi64),
10160 mkU8(8 * (8 - imm)) )
10161 )
10162 );
10163 }
10164
10165 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10166 return dV;
10167}
10168
10169
sewardj251b59e2012-05-25 13:51:07 +000010170static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10171{
10172 IRTemp dV = newTemp(Ity_V128);
10173 IRTemp hi64 = newTemp(Ity_I64);
10174 IRTemp lo64 = newTemp(Ity_I64);
10175 IRTemp hi64r = newTemp(Ity_I64);
10176 IRTemp lo64r = newTemp(Ity_I64);
10177
10178 vassert(imm >= 0 && imm <= 255);
10179 if (imm >= 16) {
10180 assign(dV, mkV128(0x0000));
10181 return dV;
10182 }
10183
10184 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10185 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10186
10187 if (imm == 0) {
10188 assign( lo64r, mkexpr(lo64) );
10189 assign( hi64r, mkexpr(hi64) );
10190 }
10191 else
10192 if (imm == 8) {
10193 assign( lo64r, mkU64(0) );
10194 assign( hi64r, mkexpr(lo64) );
10195 }
10196 else
10197 if (imm > 8) {
10198 assign( lo64r, mkU64(0) );
10199 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10200 } else {
10201 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10202 assign( hi64r,
10203 binop( Iop_Or64,
10204 binop(Iop_Shl64, mkexpr(hi64),
10205 mkU8(8 * imm)),
10206 binop(Iop_Shr64, mkexpr(lo64),
10207 mkU8(8 * (8 - imm)) )
10208 )
10209 );
10210 }
10211
10212 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10213 return dV;
10214}
10215
10216
floriancacba8e2014-12-15 18:58:07 +000010217static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
sewardj80804d12012-05-22 10:48:13 +000010218 Long delta, Bool isAvx, UChar opc, Int sz )
sewardjc4530ae2012-05-21 10:18:49 +000010219{
10220 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
sewardjc4530ae2012-05-21 10:18:49 +000010221 HChar dis_buf[50];
sewardj80804d12012-05-22 10:48:13 +000010222 Int alen = 0;
10223 UChar modrm = getUChar(delta);
10224 IRTemp addr = IRTemp_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +000010225 IRTemp rmode = newTemp(Ity_I32);
10226 IRTemp f64lo = newTemp(Ity_F64);
10227 Bool r2zero = toBool(opc == 0x2C);
10228
sewardjc4530ae2012-05-21 10:18:49 +000010229 if (epartIsReg(modrm)) {
10230 delta += 1;
10231 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10232 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10233 nameXMMReg(eregOfRexRM(pfx,modrm)),
10234 nameIReg(sz, gregOfRexRM(pfx,modrm),
10235 False));
10236 } else {
10237 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10238 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10239 delta += alen;
10240 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10241 dis_buf,
10242 nameIReg(sz, gregOfRexRM(pfx,modrm),
10243 False));
10244 }
10245
10246 if (r2zero) {
10247 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10248 } else {
10249 assign( rmode, get_sse_roundingmode() );
10250 }
10251
10252 if (sz == 4) {
10253 putIReg32( gregOfRexRM(pfx,modrm),
10254 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10255 } else {
sewardj80804d12012-05-22 10:48:13 +000010256 vassert(sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +000010257 putIReg64( gregOfRexRM(pfx,modrm),
10258 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10259 }
10260
10261 return delta;
10262}
10263
10264
floriancacba8e2014-12-15 18:58:07 +000010265static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
sewardj80804d12012-05-22 10:48:13 +000010266 Long delta, Bool isAvx, UChar opc, Int sz )
10267{
10268 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10269 HChar dis_buf[50];
10270 Int alen = 0;
10271 UChar modrm = getUChar(delta);
10272 IRTemp addr = IRTemp_INVALID;
10273 IRTemp rmode = newTemp(Ity_I32);
10274 IRTemp f32lo = newTemp(Ity_F32);
10275 Bool r2zero = toBool(opc == 0x2C);
10276
10277 if (epartIsReg(modrm)) {
10278 delta += 1;
10279 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10280 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10281 nameXMMReg(eregOfRexRM(pfx,modrm)),
10282 nameIReg(sz, gregOfRexRM(pfx,modrm),
10283 False));
10284 } else {
10285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10286 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10287 delta += alen;
10288 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10289 dis_buf,
10290 nameIReg(sz, gregOfRexRM(pfx,modrm),
10291 False));
10292 }
10293
10294 if (r2zero) {
10295 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10296 } else {
10297 assign( rmode, get_sse_roundingmode() );
10298 }
10299
10300 if (sz == 4) {
10301 putIReg32( gregOfRexRM(pfx,modrm),
10302 binop( Iop_F64toI32S,
10303 mkexpr(rmode),
10304 unop(Iop_F32toF64, mkexpr(f32lo))) );
10305 } else {
10306 vassert(sz == 8);
10307 putIReg64( gregOfRexRM(pfx,modrm),
10308 binop( Iop_F64toI64S,
10309 mkexpr(rmode),
10310 unop(Iop_F32toF64, mkexpr(f32lo))) );
10311 }
10312
10313 return delta;
10314}
10315
10316
floriancacba8e2014-12-15 18:58:07 +000010317static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010318 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +000010319{
10320 IRTemp addr = IRTemp_INVALID;
10321 Int alen = 0;
10322 HChar dis_buf[50];
10323 IRTemp f32lo = newTemp(Ity_F32);
10324 IRTemp f32hi = newTemp(Ity_F32);
10325 UChar modrm = getUChar(delta);
10326 UInt rG = gregOfRexRM(pfx,modrm);
10327 if (epartIsReg(modrm)) {
10328 UInt rE = eregOfRexRM(pfx,modrm);
10329 assign( f32lo, getXMMRegLane32F(rE, 0) );
10330 assign( f32hi, getXMMRegLane32F(rE, 1) );
10331 delta += 1;
10332 DIP("%scvtps2pd %s,%s\n",
10333 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10334 } else {
10335 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10336 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10337 assign( f32hi, loadLE(Ity_F32,
10338 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10339 delta += alen;
10340 DIP("%scvtps2pd %s,%s\n",
10341 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10342 }
10343
10344 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10345 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10346 if (isAvx)
10347 putYMMRegLane128( rG, 1, mkV128(0));
10348 return delta;
10349}
10350
10351
floriancacba8e2014-12-15 18:58:07 +000010352static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010353 Long delta )
10354{
10355 IRTemp addr = IRTemp_INVALID;
10356 Int alen = 0;
10357 HChar dis_buf[50];
10358 IRTemp f32_0 = newTemp(Ity_F32);
10359 IRTemp f32_1 = newTemp(Ity_F32);
10360 IRTemp f32_2 = newTemp(Ity_F32);
10361 IRTemp f32_3 = newTemp(Ity_F32);
10362 UChar modrm = getUChar(delta);
10363 UInt rG = gregOfRexRM(pfx,modrm);
10364 if (epartIsReg(modrm)) {
10365 UInt rE = eregOfRexRM(pfx,modrm);
10366 assign( f32_0, getXMMRegLane32F(rE, 0) );
10367 assign( f32_1, getXMMRegLane32F(rE, 1) );
10368 assign( f32_2, getXMMRegLane32F(rE, 2) );
10369 assign( f32_3, getXMMRegLane32F(rE, 3) );
10370 delta += 1;
10371 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10372 } else {
10373 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10374 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10375 assign( f32_1, loadLE(Ity_F32,
10376 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10377 assign( f32_2, loadLE(Ity_F32,
10378 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10379 assign( f32_3, loadLE(Ity_F32,
10380 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10381 delta += alen;
10382 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10383 }
10384
10385 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10386 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10387 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10388 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10389 return delta;
10390}
10391
10392
floriancacba8e2014-12-15 18:58:07 +000010393static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000010394 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +000010395{
10396 IRTemp addr = IRTemp_INVALID;
10397 Int alen = 0;
10398 HChar dis_buf[50];
10399 UChar modrm = getUChar(delta);
10400 UInt rG = gregOfRexRM(pfx,modrm);
10401 IRTemp argV = newTemp(Ity_V128);
10402 IRTemp rmode = newTemp(Ity_I32);
10403 if (epartIsReg(modrm)) {
10404 UInt rE = eregOfRexRM(pfx,modrm);
10405 assign( argV, getXMMReg(rE) );
10406 delta += 1;
10407 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10408 nameXMMReg(rE), nameXMMReg(rG));
10409 } else {
10410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10411 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10412 delta += alen;
10413 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10414 dis_buf, nameXMMReg(rG) );
10415 }
10416
10417 assign( rmode, get_sse_roundingmode() );
10418 IRTemp t0 = newTemp(Ity_F64);
10419 IRTemp t1 = newTemp(Ity_F64);
10420 assign( t0, unop(Iop_ReinterpI64asF64,
10421 unop(Iop_V128to64, mkexpr(argV))) );
10422 assign( t1, unop(Iop_ReinterpI64asF64,
10423 unop(Iop_V128HIto64, mkexpr(argV))) );
10424
10425# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10426 putXMMRegLane32( rG, 3, mkU32(0) );
10427 putXMMRegLane32( rG, 2, mkU32(0) );
10428 putXMMRegLane32F( rG, 1, CVT(t1) );
10429 putXMMRegLane32F( rG, 0, CVT(t0) );
10430# undef CVT
10431 if (isAvx)
10432 putYMMRegLane128( rG, 1, mkV128(0) );
10433
10434 return delta;
10435}
10436
10437
floriancacba8e2014-12-15 18:58:07 +000010438static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010439 Long delta, Bool isAvx, Bool r2zero )
sewardj251b59e2012-05-25 13:51:07 +000010440{
10441 IRTemp addr = IRTemp_INVALID;
10442 Int alen = 0;
10443 HChar dis_buf[50];
10444 UChar modrm = getUChar(delta);
10445 IRTemp argV = newTemp(Ity_V128);
10446 IRTemp rmode = newTemp(Ity_I32);
10447 UInt rG = gregOfRexRM(pfx,modrm);
10448 IRTemp t0, t1, t2, t3;
10449
10450 if (epartIsReg(modrm)) {
10451 UInt rE = eregOfRexRM(pfx,modrm);
10452 assign( argV, getXMMReg(rE) );
10453 delta += 1;
sewardj66becf32012-06-18 23:15:16 +000010454 DIP("%scvt%sps2dq %s,%s\n",
10455 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
sewardj251b59e2012-05-25 13:51:07 +000010456 } else {
10457 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10458 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10459 delta += alen;
sewardj66becf32012-06-18 23:15:16 +000010460 DIP("%scvt%sps2dq %s,%s\n",
10461 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
sewardj251b59e2012-05-25 13:51:07 +000010462 }
10463
10464 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10465 : get_sse_roundingmode() );
10466 t0 = t1 = t2 = t3 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010467 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
sewardj251b59e2012-05-25 13:51:07 +000010468 /* This is less than ideal. If it turns out to be a performance
10469 bottleneck it can be improved. */
10470# define CVT(_t) \
10471 binop( Iop_F64toI32S, \
10472 mkexpr(rmode), \
10473 unop( Iop_F32toF64, \
10474 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10475
10476 putXMMRegLane32( rG, 3, CVT(t3) );
10477 putXMMRegLane32( rG, 2, CVT(t2) );
10478 putXMMRegLane32( rG, 1, CVT(t1) );
10479 putXMMRegLane32( rG, 0, CVT(t0) );
10480# undef CVT
10481 if (isAvx)
10482 putYMMRegLane128( rG, 1, mkV128(0) );
10483
10484 return delta;
10485}
10486
10487
floriancacba8e2014-12-15 18:58:07 +000010488static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010489 Long delta, Bool r2zero )
10490{
10491 IRTemp addr = IRTemp_INVALID;
10492 Int alen = 0;
10493 HChar dis_buf[50];
10494 UChar modrm = getUChar(delta);
10495 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010496 IRTemp rmode = newTemp(Ity_I32);
10497 UInt rG = gregOfRexRM(pfx,modrm);
10498 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10499
10500 if (epartIsReg(modrm)) {
10501 UInt rE = eregOfRexRM(pfx,modrm);
10502 assign( argV, getYMMReg(rE) );
10503 delta += 1;
10504 DIP("vcvt%sps2dq %s,%s\n",
10505 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10506 } else {
10507 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10508 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10509 delta += alen;
10510 DIP("vcvt%sps2dq %s,%s\n",
10511 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10512 }
10513
10514 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10515 : get_sse_roundingmode() );
10516 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010517 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010518 /* This is less than ideal. If it turns out to be a performance
10519 bottleneck it can be improved. */
10520# define CVT(_t) \
10521 binop( Iop_F64toI32S, \
10522 mkexpr(rmode), \
10523 unop( Iop_F32toF64, \
10524 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10525
10526 putYMMRegLane32( rG, 7, CVT(t7) );
10527 putYMMRegLane32( rG, 6, CVT(t6) );
10528 putYMMRegLane32( rG, 5, CVT(t5) );
10529 putYMMRegLane32( rG, 4, CVT(t4) );
10530 putYMMRegLane32( rG, 3, CVT(t3) );
10531 putYMMRegLane32( rG, 2, CVT(t2) );
10532 putYMMRegLane32( rG, 1, CVT(t1) );
10533 putYMMRegLane32( rG, 0, CVT(t0) );
10534# undef CVT
10535
10536 return delta;
10537}
10538
10539
floriancacba8e2014-12-15 18:58:07 +000010540static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010541 Long delta, Bool isAvx, Bool r2zero )
10542{
10543 IRTemp addr = IRTemp_INVALID;
10544 Int alen = 0;
10545 HChar dis_buf[50];
10546 UChar modrm = getUChar(delta);
10547 IRTemp argV = newTemp(Ity_V128);
10548 IRTemp rmode = newTemp(Ity_I32);
10549 UInt rG = gregOfRexRM(pfx,modrm);
10550 IRTemp t0, t1;
10551
10552 if (epartIsReg(modrm)) {
10553 UInt rE = eregOfRexRM(pfx,modrm);
10554 assign( argV, getXMMReg(rE) );
10555 delta += 1;
10556 DIP("%scvt%spd2dq %s,%s\n",
10557 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10558 } else {
10559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10560 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10561 delta += alen;
10562 DIP("%scvt%spd2dqx %s,%s\n",
10563 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10564 }
10565
10566 if (r2zero) {
10567 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10568 } else {
10569 assign( rmode, get_sse_roundingmode() );
10570 }
10571
10572 t0 = newTemp(Ity_F64);
10573 t1 = newTemp(Ity_F64);
10574 assign( t0, unop(Iop_ReinterpI64asF64,
10575 unop(Iop_V128to64, mkexpr(argV))) );
10576 assign( t1, unop(Iop_ReinterpI64asF64,
10577 unop(Iop_V128HIto64, mkexpr(argV))) );
10578
10579# define CVT(_t) binop( Iop_F64toI32S, \
10580 mkexpr(rmode), \
10581 mkexpr(_t) )
10582
10583 putXMMRegLane32( rG, 3, mkU32(0) );
10584 putXMMRegLane32( rG, 2, mkU32(0) );
10585 putXMMRegLane32( rG, 1, CVT(t1) );
10586 putXMMRegLane32( rG, 0, CVT(t0) );
10587# undef CVT
10588 if (isAvx)
10589 putYMMRegLane128( rG, 1, mkV128(0) );
10590
10591 return delta;
10592}
10593
10594
floriancacba8e2014-12-15 18:58:07 +000010595static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010596 Long delta, Bool r2zero )
10597{
10598 IRTemp addr = IRTemp_INVALID;
10599 Int alen = 0;
10600 HChar dis_buf[50];
10601 UChar modrm = getUChar(delta);
10602 IRTemp argV = newTemp(Ity_V256);
10603 IRTemp rmode = newTemp(Ity_I32);
10604 UInt rG = gregOfRexRM(pfx,modrm);
10605 IRTemp t0, t1, t2, t3;
10606
10607 if (epartIsReg(modrm)) {
10608 UInt rE = eregOfRexRM(pfx,modrm);
10609 assign( argV, getYMMReg(rE) );
10610 delta += 1;
10611 DIP("vcvt%spd2dq %s,%s\n",
10612 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10613 } else {
10614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10615 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10616 delta += alen;
10617 DIP("vcvt%spd2dqy %s,%s\n",
10618 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10619 }
10620
10621 if (r2zero) {
10622 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10623 } else {
10624 assign( rmode, get_sse_roundingmode() );
10625 }
10626
10627 t0 = IRTemp_INVALID;
10628 t1 = IRTemp_INVALID;
10629 t2 = IRTemp_INVALID;
10630 t3 = IRTemp_INVALID;
10631 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10632
10633# define CVT(_t) binop( Iop_F64toI32S, \
10634 mkexpr(rmode), \
10635 unop( Iop_ReinterpI64asF64, \
10636 mkexpr(_t) ) )
10637
10638 putXMMRegLane32( rG, 3, CVT(t3) );
10639 putXMMRegLane32( rG, 2, CVT(t2) );
10640 putXMMRegLane32( rG, 1, CVT(t1) );
10641 putXMMRegLane32( rG, 0, CVT(t0) );
10642# undef CVT
10643 putYMMRegLane128( rG, 1, mkV128(0) );
10644
10645 return delta;
10646}
10647
10648
floriancacba8e2014-12-15 18:58:07 +000010649static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010650 Long delta, Bool isAvx )
10651{
10652 IRTemp addr = IRTemp_INVALID;
10653 Int alen = 0;
10654 HChar dis_buf[50];
10655 UChar modrm = getUChar(delta);
10656 IRTemp argV = newTemp(Ity_V128);
10657 IRTemp rmode = newTemp(Ity_I32);
10658 UInt rG = gregOfRexRM(pfx,modrm);
10659 IRTemp t0, t1, t2, t3;
10660
10661 if (epartIsReg(modrm)) {
10662 UInt rE = eregOfRexRM(pfx,modrm);
10663 assign( argV, getXMMReg(rE) );
10664 delta += 1;
10665 DIP("%scvtdq2ps %s,%s\n",
10666 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10667 } else {
10668 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10669 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10670 delta += alen;
10671 DIP("%scvtdq2ps %s,%s\n",
10672 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10673 }
10674
10675 assign( rmode, get_sse_roundingmode() );
10676 t0 = IRTemp_INVALID;
10677 t1 = IRTemp_INVALID;
10678 t2 = IRTemp_INVALID;
10679 t3 = IRTemp_INVALID;
10680 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
10681
10682# define CVT(_t) binop( Iop_F64toF32, \
10683 mkexpr(rmode), \
10684 unop(Iop_I32StoF64,mkexpr(_t)))
10685
10686 putXMMRegLane32F( rG, 3, CVT(t3) );
10687 putXMMRegLane32F( rG, 2, CVT(t2) );
10688 putXMMRegLane32F( rG, 1, CVT(t1) );
10689 putXMMRegLane32F( rG, 0, CVT(t0) );
10690# undef CVT
10691 if (isAvx)
10692 putYMMRegLane128( rG, 1, mkV128(0) );
10693
10694 return delta;
10695}
10696
floriancacba8e2014-12-15 18:58:07 +000010697static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj66becf32012-06-18 23:15:16 +000010698 Long delta )
10699{
10700 IRTemp addr = IRTemp_INVALID;
10701 Int alen = 0;
10702 HChar dis_buf[50];
10703 UChar modrm = getUChar(delta);
10704 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010705 IRTemp rmode = newTemp(Ity_I32);
10706 UInt rG = gregOfRexRM(pfx,modrm);
10707 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10708
10709 if (epartIsReg(modrm)) {
10710 UInt rE = eregOfRexRM(pfx,modrm);
10711 assign( argV, getYMMReg(rE) );
10712 delta += 1;
10713 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10714 } else {
10715 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10716 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10717 delta += alen;
10718 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10719 }
10720
10721 assign( rmode, get_sse_roundingmode() );
10722 t0 = IRTemp_INVALID;
10723 t1 = IRTemp_INVALID;
10724 t2 = IRTemp_INVALID;
10725 t3 = IRTemp_INVALID;
10726 t4 = IRTemp_INVALID;
10727 t5 = IRTemp_INVALID;
10728 t6 = IRTemp_INVALID;
10729 t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010730 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010731
10732# define CVT(_t) binop( Iop_F64toF32, \
10733 mkexpr(rmode), \
10734 unop(Iop_I32StoF64,mkexpr(_t)))
10735
10736 putYMMRegLane32F( rG, 7, CVT(t7) );
10737 putYMMRegLane32F( rG, 6, CVT(t6) );
10738 putYMMRegLane32F( rG, 5, CVT(t5) );
10739 putYMMRegLane32F( rG, 4, CVT(t4) );
10740 putYMMRegLane32F( rG, 3, CVT(t3) );
10741 putYMMRegLane32F( rG, 2, CVT(t2) );
10742 putYMMRegLane32F( rG, 1, CVT(t1) );
10743 putYMMRegLane32F( rG, 0, CVT(t0) );
10744# undef CVT
10745
10746 return delta;
10747}
10748
10749
floriancacba8e2014-12-15 18:58:07 +000010750static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8ef22422012-05-24 16:29:18 +000010751 Long delta, Bool isAvx )
10752{
sewardj8ef22422012-05-24 16:29:18 +000010753 UChar modrm = getUChar(delta);
10754 vassert(epartIsReg(modrm)); /* ensured by caller */
10755 UInt rE = eregOfRexRM(pfx,modrm);
10756 UInt rG = gregOfRexRM(pfx,modrm);
sewardj78a20592012-12-13 18:29:56 +000010757 IRTemp t0 = newTemp(Ity_V128);
10758 IRTemp t1 = newTemp(Ity_I32);
10759 assign(t0, getXMMReg(rE));
10760 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
10761 putIReg32(rG, mkexpr(t1));
sewardj8ef22422012-05-24 16:29:18 +000010762 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10763 nameIReg32(rG));
10764 delta += 1;
10765 return delta;
10766}
10767
10768
floriancacba8e2014-12-15 18:58:07 +000010769static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000010770 Long delta )
10771{
10772 UChar modrm = getUChar(delta);
10773 vassert(epartIsReg(modrm)); /* ensured by caller */
10774 UInt rE = eregOfRexRM(pfx,modrm);
10775 UInt rG = gregOfRexRM(pfx,modrm);
10776 IRTemp t0 = newTemp(Ity_V128);
10777 IRTemp t1 = newTemp(Ity_V128);
10778 IRTemp t2 = newTemp(Ity_I16);
10779 IRTemp t3 = newTemp(Ity_I16);
10780 assign(t0, getYMMRegLane128(rE, 0));
10781 assign(t1, getYMMRegLane128(rE, 1));
10782 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
10783 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
10784 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
10785 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
10786 delta += 1;
10787 return delta;
10788}
10789
10790
sewardj4b1cc832012-06-13 11:10:20 +000010791/* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10792 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
sewardj56c30312012-06-12 08:45:39 +000010793/* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10794static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
sewardjc4530ae2012-05-21 10:18:49 +000010795{
10796 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
sewardjc4530ae2012-05-21 10:18:49 +000010797 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010798 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10799 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +000010800 IRTemp res = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000010801 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
10802 : mkV128from32s( s1, d1, s0, d0 ));
sewardj56c30312012-06-12 08:45:39 +000010803 return res;
10804}
10805
10806
10807/* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
10808/* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
10809static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
10810{
10811 IRTemp s1 = newTemp(Ity_I64);
10812 IRTemp s0 = newTemp(Ity_I64);
10813 IRTemp d1 = newTemp(Ity_I64);
10814 IRTemp d0 = newTemp(Ity_I64);
10815 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10816 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10817 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10818 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10819 IRTemp res = newTemp(Ity_V128);
10820 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
10821 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
sewardjc4530ae2012-05-21 10:18:49 +000010822 return res;
10823}
10824
10825
sewardj4b1cc832012-06-13 11:10:20 +000010826/* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
10827 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
10828 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
10829 way. */
10830static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10831{
10832 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10833 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10834 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
10835 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
10836 IRTemp res = newTemp(Ity_V256);
10837 assign(res, xIsH
10838 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
10839 mkexpr(s1), mkexpr(d1))
10840 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
10841 mkexpr(s0), mkexpr(d0)));
10842 return res;
10843}
10844
10845
10846/* FIXME: this is really bad. Surely can do something better here?
10847 One observation is that the steering in the upper and lower 128 bit
10848 halves is the same as with math_UNPCKxPS_128, so we simply split
10849 into two halves, and use that. Consequently any improvement in
10850 math_UNPCKxPS_128 (probably, to use interleave-style primops)
10851 benefits this too. */
10852static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10853{
sewardjfe0c5e72012-06-15 15:48:07 +000010854 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10855 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10856 breakupV256toV128s( sV, &sVhi, &sVlo );
10857 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010858 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
10859 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
10860 IRTemp rV = newTemp(Ity_V256);
10861 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10862 return rV;
10863}
10864
10865
10866static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
sewardj251b59e2012-05-25 13:51:07 +000010867{
10868 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10869 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10870 vassert(imm8 < 256);
10871
sewardj4b1cc832012-06-13 11:10:20 +000010872 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10873 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardj251b59e2012-05-25 13:51:07 +000010874
10875# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
10876# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10877 IRTemp res = newTemp(Ity_V128);
10878 assign(res,
sewardj4b1cc832012-06-13 11:10:20 +000010879 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
10880 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
sewardj251b59e2012-05-25 13:51:07 +000010881# undef SELD
10882# undef SELS
10883 return res;
10884}
10885
10886
sewardj4b1cc832012-06-13 11:10:20 +000010887/* 256-bit SHUFPS appears to steer each of the 128-bit halves
10888 identically. Hence do the clueless thing and use math_SHUFPS_128
10889 twice. */
10890static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10891{
sewardjfe0c5e72012-06-15 15:48:07 +000010892 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10893 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10894 breakupV256toV128s( sV, &sVhi, &sVlo );
10895 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010896 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
10897 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
10898 IRTemp rV = newTemp(Ity_V256);
10899 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10900 return rV;
10901}
10902
10903
sewardj21459cb2012-06-18 14:05:52 +000010904static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10905{
10906 IRTemp s1 = newTemp(Ity_I64);
10907 IRTemp s0 = newTemp(Ity_I64);
10908 IRTemp d1 = newTemp(Ity_I64);
10909 IRTemp d0 = newTemp(Ity_I64);
10910
10911 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10912 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10913 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10914 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10915
10916# define SELD(n) mkexpr((n)==0 ? d0 : d1)
10917# define SELS(n) mkexpr((n)==0 ? s0 : s1)
10918
10919 IRTemp res = newTemp(Ity_V128);
10920 assign(res, binop( Iop_64HLtoV128,
10921 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
10922
10923# undef SELD
10924# undef SELS
10925 return res;
10926}
10927
10928
10929static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10930{
10931 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10932 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10933 breakupV256toV128s( sV, &sVhi, &sVlo );
10934 breakupV256toV128s( dV, &dVhi, &dVlo );
10935 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10936 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
10937 IRTemp rV = newTemp(Ity_V256);
10938 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10939 return rV;
10940}
10941
10942
10943static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10944{
10945 UShort imm8_mask_16;
10946 IRTemp imm8_mask = newTemp(Ity_V128);
10947
10948 switch( imm8 & 3 ) {
10949 case 0: imm8_mask_16 = 0x0000; break;
10950 case 1: imm8_mask_16 = 0x00FF; break;
10951 case 2: imm8_mask_16 = 0xFF00; break;
10952 case 3: imm8_mask_16 = 0xFFFF; break;
10953 default: vassert(0); break;
10954 }
10955 assign( imm8_mask, mkV128( imm8_mask_16 ) );
10956
10957 IRTemp res = newTemp(Ity_V128);
10958 assign ( res, binop( Iop_OrV128,
10959 binop( Iop_AndV128, mkexpr(sV),
10960 mkexpr(imm8_mask) ),
10961 binop( Iop_AndV128, mkexpr(dV),
10962 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10963 return res;
10964}
10965
10966
10967static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10968{
10969 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10970 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10971 breakupV256toV128s( sV, &sVhi, &sVlo );
10972 breakupV256toV128s( dV, &dVhi, &dVlo );
10973 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10974 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
10975 IRTemp rV = newTemp(Ity_V256);
10976 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10977 return rV;
10978}
10979
10980
10981static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10982{
10983 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
10984 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
10985 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
10986 0xFFFF };
10987 IRTemp imm8_mask = newTemp(Ity_V128);
10988 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
10989
10990 IRTemp res = newTemp(Ity_V128);
10991 assign ( res, binop( Iop_OrV128,
10992 binop( Iop_AndV128, mkexpr(sV),
10993 mkexpr(imm8_mask) ),
10994 binop( Iop_AndV128, mkexpr(dV),
10995 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10996 return res;
10997}
10998
10999
11000static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11001{
11002 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11003 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11004 breakupV256toV128s( sV, &sVhi, &sVlo );
11005 breakupV256toV128s( dV, &dVhi, &dVlo );
11006 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11007 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11008 IRTemp rV = newTemp(Ity_V256);
11009 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11010 return rV;
11011}
11012
11013
11014static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11015{
11016 /* Make w be a 16-bit version of imm8, formed by duplicating each
11017 bit in imm8. */
11018 Int i;
11019 UShort imm16 = 0;
11020 for (i = 0; i < 8; i++) {
11021 if (imm8 & (1 << i))
11022 imm16 |= (3 << (2*i));
11023 }
11024 IRTemp imm16_mask = newTemp(Ity_V128);
11025 assign( imm16_mask, mkV128( imm16 ));
11026
11027 IRTemp res = newTemp(Ity_V128);
11028 assign ( res, binop( Iop_OrV128,
11029 binop( Iop_AndV128, mkexpr(sV),
11030 mkexpr(imm16_mask) ),
11031 binop( Iop_AndV128, mkexpr(dV),
11032 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11033 return res;
11034}
11035
11036
sewardje8a7eb72012-06-12 14:59:17 +000011037static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11038{
11039 /* This is a really poor translation -- could be improved if
11040 performance critical */
11041 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11042 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000011043 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11044 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000011045 IRTemp res = newTemp(Ity_V128);
11046 assign(res, binop(Iop_64HLtoV128,
11047 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11048 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11049 return res;
11050}
11051
11052
sewardjcc3d2192013-03-27 11:37:33 +000011053static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11054{
11055 /* This is a really poor translation -- could be improved if
11056 performance critical */
11057 IRTemp sHi, sLo, dHi, dLo;
11058 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11059 breakupV256toV128s( dV, &dHi, &dLo);
11060 breakupV256toV128s( sV, &sHi, &sLo);
11061 IRTemp res = newTemp(Ity_V256);
11062 assign(res, binop(Iop_V128HLtoV256,
11063 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11064 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11065 return res;
11066}
11067
11068
sewardj89378162012-06-24 12:12:20 +000011069static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11070{
11071 /* This is a really poor translation -- could be improved if
11072 performance critical */
11073 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11074 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11075 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11076 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11077 IRTemp res = newTemp(Ity_V128);
11078 assign(res, binop(Iop_64HLtoV128,
11079 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11080 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11081 return res;
11082}
11083
11084
sewardjcc3d2192013-03-27 11:37:33 +000011085static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11086{
11087 /* This is a really poor translation -- could be improved if
11088 performance critical */
11089 IRTemp sHi, sLo, dHi, dLo;
11090 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11091 breakupV256toV128s( dV, &dHi, &dLo);
11092 breakupV256toV128s( sV, &sHi, &sLo);
11093 IRTemp res = newTemp(Ity_V256);
11094 assign(res, binop(Iop_V128HLtoV256,
11095 mkexpr(math_PMULDQ_128(sHi, dHi)),
11096 mkexpr(math_PMULDQ_128(sLo, dLo))));
11097 return res;
11098}
11099
11100
sewardj89378162012-06-24 12:12:20 +000011101static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11102{
11103 IRTemp sVhi, sVlo, dVhi, dVlo;
11104 IRTemp resHi = newTemp(Ity_I64);
11105 IRTemp resLo = newTemp(Ity_I64);
11106 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11107 breakupV128to64s( sV, &sVhi, &sVlo );
11108 breakupV128to64s( dV, &dVhi, &dVlo );
11109 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11110 "amd64g_calculate_mmx_pmaddwd",
11111 &amd64g_calculate_mmx_pmaddwd,
11112 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11113 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11114 "amd64g_calculate_mmx_pmaddwd",
11115 &amd64g_calculate_mmx_pmaddwd,
11116 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11117 IRTemp res = newTemp(Ity_V128);
11118 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11119 return res;
11120}
11121
11122
sewardjcc3d2192013-03-27 11:37:33 +000011123static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11124{
11125 IRTemp sHi, sLo, dHi, dLo;
11126 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11127 breakupV256toV128s( dV, &dHi, &dLo);
11128 breakupV256toV128s( sV, &sHi, &sLo);
11129 IRTemp res = newTemp(Ity_V256);
11130 assign(res, binop(Iop_V128HLtoV256,
11131 mkexpr(math_PMADDWD_128(dHi, sHi)),
11132 mkexpr(math_PMADDWD_128(dLo, sLo))));
11133 return res;
11134}
11135
11136
sewardj89378162012-06-24 12:12:20 +000011137static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11138{
11139 IRTemp addV = newTemp(Ity_V128);
11140 IRTemp subV = newTemp(Ity_V128);
11141 IRTemp a1 = newTemp(Ity_I64);
11142 IRTemp s0 = newTemp(Ity_I64);
sewardj9571dc02014-01-26 18:34:23 +000011143 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011144
sewardj9571dc02014-01-26 18:34:23 +000011145 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11146 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11147 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011148
11149 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11150 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11151
11152 IRTemp res = newTemp(Ity_V128);
11153 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11154 return res;
11155}
11156
11157
11158static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11159{
11160 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11161 IRTemp addV = newTemp(Ity_V256);
11162 IRTemp subV = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000011163 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011164 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11165
sewardj9571dc02014-01-26 18:34:23 +000011166 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11167 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11168 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011169
11170 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11171 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11172
11173 IRTemp res = newTemp(Ity_V256);
11174 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11175 return res;
11176}
11177
11178
11179static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11180{
11181 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11182 IRTemp addV = newTemp(Ity_V128);
11183 IRTemp subV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000011184 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011185 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11186
sewardj9571dc02014-01-26 18:34:23 +000011187 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11188 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11189 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011190
11191 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11192 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11193
11194 IRTemp res = newTemp(Ity_V128);
11195 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11196 return res;
11197}
11198
11199
11200static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11201{
11202 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11203 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11204 IRTemp addV = newTemp(Ity_V256);
11205 IRTemp subV = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000011206 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011207 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11208 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11209
sewardj9571dc02014-01-26 18:34:23 +000011210 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11211 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11212 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011213
11214 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11215 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11216
11217 IRTemp res = newTemp(Ity_V256);
11218 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11219 return res;
11220}
11221
11222
sewardj251b59e2012-05-25 13:51:07 +000011223/* Handle 128 bit PSHUFLW and PSHUFHW. */
floriancacba8e2014-12-15 18:58:07 +000011224static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj251b59e2012-05-25 13:51:07 +000011225 Long delta, Bool isAvx, Bool xIsH )
11226{
11227 IRTemp addr = IRTemp_INVALID;
11228 Int alen = 0;
11229 HChar dis_buf[50];
11230 UChar modrm = getUChar(delta);
11231 UInt rG = gregOfRexRM(pfx,modrm);
11232 UInt imm8;
11233 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11234 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11235 sV = newTemp(Ity_V128);
11236 dV = newTemp(Ity_V128);
11237 sVmut = newTemp(Ity_I64);
11238 dVmut = newTemp(Ity_I64);
11239 sVcon = newTemp(Ity_I64);
11240 if (epartIsReg(modrm)) {
11241 UInt rE = eregOfRexRM(pfx,modrm);
11242 assign( sV, getXMMReg(rE) );
11243 imm8 = (UInt)getUChar(delta+1);
11244 delta += 1+1;
11245 DIP("%spshuf%cw $%u,%s,%s\n",
11246 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11247 imm8, nameXMMReg(rE), nameXMMReg(rG));
11248 } else {
11249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11250 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11251 imm8 = (UInt)getUChar(delta+alen);
11252 delta += alen+1;
11253 DIP("%spshuf%cw $%u,%s,%s\n",
11254 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11255 imm8, dis_buf, nameXMMReg(rG));
11256 }
11257
11258 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11259 source. */
11260 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11261 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11262
11263 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11264# define SEL(n) \
11265 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11266 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11267 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11268# undef SEL
11269
11270 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11271 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11272
11273 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11274 return delta;
11275}
11276
11277
sewardjcc3d2192013-03-27 11:37:33 +000011278/* Handle 256 bit PSHUFLW and PSHUFHW. */
floriancacba8e2014-12-15 18:58:07 +000011279static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000011280 Long delta, Bool xIsH )
11281{
11282 IRTemp addr = IRTemp_INVALID;
11283 Int alen = 0;
11284 HChar dis_buf[50];
11285 UChar modrm = getUChar(delta);
11286 UInt rG = gregOfRexRM(pfx,modrm);
11287 UInt imm8;
11288 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11289 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11290 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11291 sV = newTemp(Ity_V256);
11292 dVhi = newTemp(Ity_I64);
11293 dVlo = newTemp(Ity_I64);
11294 if (epartIsReg(modrm)) {
11295 UInt rE = eregOfRexRM(pfx,modrm);
11296 assign( sV, getYMMReg(rE) );
11297 imm8 = (UInt)getUChar(delta+1);
11298 delta += 1+1;
11299 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11300 imm8, nameYMMReg(rE), nameYMMReg(rG));
11301 } else {
11302 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11303 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11304 imm8 = (UInt)getUChar(delta+alen);
11305 delta += alen+1;
11306 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11307 imm8, dis_buf, nameYMMReg(rG));
11308 }
11309
11310 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11311 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11312 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11313
11314 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11315 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11316 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11317 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11318 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11319 xIsH ? sV64[2] : dVhi,
11320 xIsH ? dVlo : sV64[1],
11321 xIsH ? sV64[0] : dVlo ) );
11322 return delta;
11323}
11324
11325
floriancacba8e2014-12-15 18:58:07 +000011326static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
sewardje8a7eb72012-06-12 14:59:17 +000011327 Long delta, Bool isAvx )
11328{
11329 Long deltaIN = delta;
11330 UChar modrm = getUChar(delta);
11331 UInt rG = gregOfRexRM(pfx,modrm);
11332 IRTemp sV = newTemp(Ity_V128);
11333 IRTemp d16 = newTemp(Ity_I16);
11334 UInt imm8;
11335 IRTemp s0, s1, s2, s3;
11336 if (epartIsReg(modrm)) {
11337 UInt rE = eregOfRexRM(pfx,modrm);
11338 assign(sV, getXMMReg(rE));
11339 imm8 = getUChar(delta+1) & 7;
11340 delta += 1+1;
11341 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "",
11342 (Int)imm8, nameXMMReg(rE), nameIReg32(rG));
11343 } else {
11344 /* The memory case is disallowed, apparently. */
11345 return deltaIN; /* FAIL */
11346 }
11347 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000011348 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000011349 switch (imm8) {
11350 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11351 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11352 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11353 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11354 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11355 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11356 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11357 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11358 default: vassert(0);
11359 }
11360 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11361 return delta;
11362}
11363
11364
floriancacba8e2014-12-15 18:58:07 +000011365static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj4b1cc832012-06-13 11:10:20 +000011366 Long delta, Bool isAvx )
11367{
11368 IRTemp addr = IRTemp_INVALID;
11369 Int alen = 0;
11370 HChar dis_buf[50];
11371 UChar modrm = getUChar(delta);
11372 IRTemp arg64 = newTemp(Ity_I64);
11373 UInt rG = gregOfRexRM(pfx,modrm);
florian55085f82012-11-21 00:36:55 +000011374 const HChar* mbV = isAvx ? "v" : "";
sewardj4b1cc832012-06-13 11:10:20 +000011375 if (epartIsReg(modrm)) {
11376 UInt rE = eregOfRexRM(pfx,modrm);
11377 assign( arg64, getXMMRegLane64(rE, 0) );
11378 delta += 1;
11379 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11380 } else {
11381 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11382 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11383 delta += alen;
11384 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11385 }
11386 putXMMRegLane64F(
11387 rG, 0,
11388 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11389 );
11390 putXMMRegLane64F(
11391 rG, 1,
11392 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11393 );
11394 if (isAvx)
11395 putYMMRegLane128(rG, 1, mkV128(0));
11396 return delta;
11397}
11398
11399
floriancacba8e2014-12-15 18:58:07 +000011400static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
sewardjfe0c5e72012-06-15 15:48:07 +000011401 Long delta, Bool isAvx )
11402{
11403 IRTemp addr = IRTemp_INVALID;
11404 Int alen = 0;
11405 HChar dis_buf[50];
11406 UChar modrm = getUChar(delta);
11407 vassert(!epartIsReg(modrm)); /* ensured by caller */
11408 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11409
11410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11411 delta += alen;
11412
11413 /* Fake up a native SSE mxcsr word. The only thing it depends on
11414 is SSEROUND[1:0], so call a clean helper to cook it up.
11415 */
11416 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11417 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11418 storeLE(
11419 mkexpr(addr),
11420 unop(Iop_64to32,
11421 mkIRExprCCall(
11422 Ity_I64, 0/*regp*/,
11423 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11424 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11425 )
11426 )
11427 );
11428 return delta;
11429}
11430
11431
floriancacba8e2014-12-15 18:58:07 +000011432static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
sewardjfe0c5e72012-06-15 15:48:07 +000011433 Long delta, Bool isAvx )
11434{
11435 IRTemp addr = IRTemp_INVALID;
11436 Int alen = 0;
11437 HChar dis_buf[50];
11438 UChar modrm = getUChar(delta);
11439 vassert(!epartIsReg(modrm)); /* ensured by caller */
11440 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11441
11442 IRTemp t64 = newTemp(Ity_I64);
11443 IRTemp ew = newTemp(Ity_I32);
11444
11445 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11446 delta += alen;
11447 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11448
11449 /* The only thing we observe in %mxcsr is the rounding mode.
11450 Therefore, pass the 32-bit value (SSE native-format control
11451 word) to a clean helper, getting back a 64-bit value, the
11452 lower half of which is the SSEROUND value to store, and the
11453 upper half of which is the emulation-warning token which may
11454 be generated.
11455 */
11456 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11457 assign( t64, mkIRExprCCall(
11458 Ity_I64, 0/*regparms*/,
11459 "amd64g_check_ldmxcsr",
11460 &amd64g_check_ldmxcsr,
11461 mkIRExprVec_1(
11462 unop(Iop_32Uto64,
11463 loadLE(Ity_I32, mkexpr(addr))
11464 )
11465 )
11466 )
11467 );
11468
11469 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11470 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11471 put_emwarn( mkexpr(ew) );
11472 /* Finally, if an emulation warning was reported, side-exit to
11473 the next insn, reporting the warning, so that Valgrind's
11474 dispatcher sees the warning. */
11475 stmt(
11476 IRStmt_Exit(
11477 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11478 Ijk_EmWarn,
11479 IRConst_U64(guest_RIP_bbstart+delta),
11480 OFFB_RIP
11481 )
11482 );
11483 return delta;
11484}
11485
11486
sewardj4ed05e02012-06-18 15:01:30 +000011487static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
11488{
11489 vassert(imm8 >= 0 && imm8 <= 7);
11490
11491 // Create a V128 value which has the selected word in the
11492 // specified lane, and zeroes everywhere else.
11493 IRTemp tmp128 = newTemp(Ity_V128);
11494 IRTemp halfshift = newTemp(Ity_I64);
11495 assign(halfshift, binop(Iop_Shl64,
11496 unop(Iop_16Uto64, mkexpr(u16)),
11497 mkU8(16 * (imm8 & 3))));
11498 if (imm8 < 4) {
11499 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
11500 } else {
11501 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
11502 }
11503
11504 UShort mask = ~(3 << (imm8 * 2));
11505 IRTemp res = newTemp(Ity_V128);
11506 assign( res, binop(Iop_OrV128,
11507 mkexpr(tmp128),
11508 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
11509 return res;
11510}
11511
11512
sewardj82096922012-06-24 14:57:59 +000011513static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
11514{
11515 IRTemp s1, s0, d1, d0;
11516 s1 = s0 = d1 = d0 = IRTemp_INVALID;
11517
11518 breakupV128to64s( sV, &s1, &s0 );
11519 breakupV128to64s( dV, &d1, &d0 );
11520
11521 IRTemp res = newTemp(Ity_V128);
11522 assign( res,
11523 binop(Iop_64HLtoV128,
11524 mkIRExprCCall(Ity_I64, 0/*regparms*/,
11525 "amd64g_calculate_mmx_psadbw",
11526 &amd64g_calculate_mmx_psadbw,
11527 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
11528 mkIRExprCCall(Ity_I64, 0/*regparms*/,
11529 "amd64g_calculate_mmx_psadbw",
11530 &amd64g_calculate_mmx_psadbw,
11531 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
11532 return res;
11533}
11534
11535
sewardjcc3d2192013-03-27 11:37:33 +000011536static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
11537{
11538 IRTemp sHi, sLo, dHi, dLo;
11539 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11540 breakupV256toV128s( dV, &dHi, &dLo);
11541 breakupV256toV128s( sV, &sHi, &sLo);
11542 IRTemp res = newTemp(Ity_V256);
11543 assign(res, binop(Iop_V128HLtoV256,
11544 mkexpr(math_PSADBW_128(dHi, sHi)),
11545 mkexpr(math_PSADBW_128(dLo, sLo))));
11546 return res;
11547}
11548
11549
floriancacba8e2014-12-15 18:58:07 +000011550static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8eb7ae82012-06-24 14:00:27 +000011551 Long delta, Bool isAvx )
11552{
11553 IRTemp regD = newTemp(Ity_V128);
11554 IRTemp mask = newTemp(Ity_V128);
11555 IRTemp olddata = newTemp(Ity_V128);
11556 IRTemp newdata = newTemp(Ity_V128);
11557 IRTemp addr = newTemp(Ity_I64);
11558 UChar modrm = getUChar(delta);
11559 UInt rG = gregOfRexRM(pfx,modrm);
11560 UInt rE = eregOfRexRM(pfx,modrm);
11561
11562 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
11563 assign( regD, getXMMReg( rG ));
11564
11565 /* Unfortunately can't do the obvious thing with SarN8x16
11566 here since that can't be re-emitted as SSE2 code - no such
11567 insn. */
11568 assign( mask,
11569 binop(Iop_64HLtoV128,
11570 binop(Iop_SarN8x8,
11571 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
11572 mkU8(7) ),
11573 binop(Iop_SarN8x8,
11574 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
11575 mkU8(7) ) ));
11576 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
11577 assign( newdata, binop(Iop_OrV128,
11578 binop(Iop_AndV128,
11579 mkexpr(regD),
11580 mkexpr(mask) ),
11581 binop(Iop_AndV128,
11582 mkexpr(olddata),
11583 unop(Iop_NotV128, mkexpr(mask)))) );
11584 storeLE( mkexpr(addr), mkexpr(newdata) );
11585
11586 delta += 1;
11587 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
11588 nameXMMReg(rE), nameXMMReg(rG) );
11589 return delta;
11590}
11591
11592
floriancacba8e2014-12-15 18:58:07 +000011593static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8eb7ae82012-06-24 14:00:27 +000011594 Long delta, Bool isAvx )
11595{
11596 UChar modrm = getUChar(delta);
11597 UInt rG = gregOfRexRM(pfx,modrm);
11598 UInt rE = eregOfRexRM(pfx,modrm);
11599 IRTemp t0 = newTemp(Ity_I32);
11600 IRTemp t1 = newTemp(Ity_I32);
11601 IRTemp t2 = newTemp(Ity_I32);
11602 IRTemp t3 = newTemp(Ity_I32);
11603 delta += 1;
11604 assign( t0, binop( Iop_And32,
11605 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
11606 mkU32(1) ));
11607 assign( t1, binop( Iop_And32,
11608 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
11609 mkU32(2) ));
11610 assign( t2, binop( Iop_And32,
11611 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
11612 mkU32(4) ));
11613 assign( t3, binop( Iop_And32,
11614 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
11615 mkU32(8) ));
11616 putIReg32( rG, binop(Iop_Or32,
11617 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11618 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11619 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
11620 nameXMMReg(rE), nameIReg32(rG));
11621 return delta;
11622}
11623
11624
floriancacba8e2014-12-15 18:58:07 +000011625static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardj8eb7ae82012-06-24 14:00:27 +000011626{
11627 UChar modrm = getUChar(delta);
11628 UInt rG = gregOfRexRM(pfx,modrm);
11629 UInt rE = eregOfRexRM(pfx,modrm);
11630 IRTemp t0 = newTemp(Ity_I32);
11631 IRTemp t1 = newTemp(Ity_I32);
11632 IRTemp t2 = newTemp(Ity_I32);
11633 IRTemp t3 = newTemp(Ity_I32);
11634 IRTemp t4 = newTemp(Ity_I32);
11635 IRTemp t5 = newTemp(Ity_I32);
11636 IRTemp t6 = newTemp(Ity_I32);
11637 IRTemp t7 = newTemp(Ity_I32);
11638 delta += 1;
11639 assign( t0, binop( Iop_And32,
11640 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
11641 mkU32(1) ));
11642 assign( t1, binop( Iop_And32,
11643 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
11644 mkU32(2) ));
11645 assign( t2, binop( Iop_And32,
11646 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
11647 mkU32(4) ));
11648 assign( t3, binop( Iop_And32,
11649 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
11650 mkU32(8) ));
11651 assign( t4, binop( Iop_And32,
11652 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
11653 mkU32(16) ));
11654 assign( t5, binop( Iop_And32,
11655 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
11656 mkU32(32) ));
11657 assign( t6, binop( Iop_And32,
11658 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
11659 mkU32(64) ));
11660 assign( t7, binop( Iop_And32,
11661 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
11662 mkU32(128) ));
11663 putIReg32( rG, binop(Iop_Or32,
11664 binop(Iop_Or32,
11665 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11666 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
11667 binop(Iop_Or32,
11668 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
11669 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
11670 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11671 return delta;
11672}
11673
11674
floriancacba8e2014-12-15 18:58:07 +000011675static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8eb7ae82012-06-24 14:00:27 +000011676 Long delta, Bool isAvx )
11677{
11678 UChar modrm = getUChar(delta);
11679 UInt rG = gregOfRexRM(pfx,modrm);
11680 UInt rE = eregOfRexRM(pfx,modrm);
11681 IRTemp t0 = newTemp(Ity_I32);
11682 IRTemp t1 = newTemp(Ity_I32);
11683 delta += 1;
11684 assign( t0, binop( Iop_And32,
11685 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
11686 mkU32(1) ));
11687 assign( t1, binop( Iop_And32,
11688 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
11689 mkU32(2) ));
11690 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
11691 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
11692 nameXMMReg(rE), nameIReg32(rG));
11693 return delta;
11694}
11695
11696
floriancacba8e2014-12-15 18:58:07 +000011697static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardj8eb7ae82012-06-24 14:00:27 +000011698{
11699 UChar modrm = getUChar(delta);
11700 UInt rG = gregOfRexRM(pfx,modrm);
11701 UInt rE = eregOfRexRM(pfx,modrm);
11702 IRTemp t0 = newTemp(Ity_I32);
11703 IRTemp t1 = newTemp(Ity_I32);
11704 IRTemp t2 = newTemp(Ity_I32);
11705 IRTemp t3 = newTemp(Ity_I32);
11706 delta += 1;
11707 assign( t0, binop( Iop_And32,
11708 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
11709 mkU32(1) ));
11710 assign( t1, binop( Iop_And32,
11711 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
11712 mkU32(2) ));
11713 assign( t2, binop( Iop_And32,
11714 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
11715 mkU32(4) ));
11716 assign( t3, binop( Iop_And32,
11717 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
11718 mkU32(8) ));
11719 putIReg32( rG, binop(Iop_Or32,
11720 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11721 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11722 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11723 return delta;
11724}
11725
11726
sewardj80611e32012-01-20 13:07:24 +000011727/* Note, this also handles SSE(1) insns. */
11728__attribute__((noinline))
11729static
11730Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000011731 const VexAbiInfo* vbi,
sewardj30fc0582012-02-16 13:45:13 +000011732 Prefix pfx, Int sz, Long deltaIN,
11733 DisResult* dres )
sewardj80611e32012-01-20 13:07:24 +000011734{
11735 IRTemp addr = IRTemp_INVALID;
11736 IRTemp t0 = IRTemp_INVALID;
11737 IRTemp t1 = IRTemp_INVALID;
11738 IRTemp t2 = IRTemp_INVALID;
11739 IRTemp t3 = IRTemp_INVALID;
11740 IRTemp t4 = IRTemp_INVALID;
11741 IRTemp t5 = IRTemp_INVALID;
11742 IRTemp t6 = IRTemp_INVALID;
11743 UChar modrm = 0;
11744 Int alen = 0;
11745 HChar dis_buf[50];
11746
11747 *decode_OK = False;
11748
11749 Long delta = deltaIN;
11750 UChar opc = getUChar(delta);
11751 delta++;
11752 switch (opc) {
11753
11754 case 0x10:
11755 if (have66noF2noF3(pfx)
11756 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11757 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
11758 modrm = getUChar(delta);
11759 if (epartIsReg(modrm)) {
11760 putXMMReg( gregOfRexRM(pfx,modrm),
11761 getXMMReg( eregOfRexRM(pfx,modrm) ));
11762 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11763 nameXMMReg(gregOfRexRM(pfx,modrm)));
11764 delta += 1;
11765 } else {
11766 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11767 putXMMReg( gregOfRexRM(pfx,modrm),
11768 loadLE(Ity_V128, mkexpr(addr)) );
11769 DIP("movupd %s,%s\n", dis_buf,
11770 nameXMMReg(gregOfRexRM(pfx,modrm)));
11771 delta += alen;
11772 }
11773 goto decode_success;
11774 }
11775 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11776 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11777 If E is reg, upper half of G is unchanged. */
11778 if (haveF2no66noF3(pfx)
11779 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
11780 modrm = getUChar(delta);
11781 if (epartIsReg(modrm)) {
11782 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11783 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11784 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11785 nameXMMReg(gregOfRexRM(pfx,modrm)));
11786 delta += 1;
11787 } else {
11788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11789 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11790 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11791 loadLE(Ity_I64, mkexpr(addr)) );
11792 DIP("movsd %s,%s\n", dis_buf,
11793 nameXMMReg(gregOfRexRM(pfx,modrm)));
11794 delta += alen;
11795 }
11796 goto decode_success;
11797 }
11798 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
11799 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
11800 if (haveF3no66noF2(pfx)
11801 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11802 modrm = getUChar(delta);
11803 if (epartIsReg(modrm)) {
11804 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11805 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
11806 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11807 nameXMMReg(gregOfRexRM(pfx,modrm)));
11808 delta += 1;
11809 } else {
11810 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11811 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11812 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11813 loadLE(Ity_I32, mkexpr(addr)) );
11814 DIP("movss %s,%s\n", dis_buf,
11815 nameXMMReg(gregOfRexRM(pfx,modrm)));
11816 delta += alen;
11817 }
11818 goto decode_success;
11819 }
11820 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
11821 if (haveNo66noF2noF3(pfx)
11822 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11823 modrm = getUChar(delta);
11824 if (epartIsReg(modrm)) {
11825 putXMMReg( gregOfRexRM(pfx,modrm),
11826 getXMMReg( eregOfRexRM(pfx,modrm) ));
11827 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11828 nameXMMReg(gregOfRexRM(pfx,modrm)));
11829 delta += 1;
11830 } else {
11831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11832 putXMMReg( gregOfRexRM(pfx,modrm),
11833 loadLE(Ity_V128, mkexpr(addr)) );
11834 DIP("movups %s,%s\n", dis_buf,
11835 nameXMMReg(gregOfRexRM(pfx,modrm)));
11836 delta += alen;
11837 }
11838 goto decode_success;
11839 }
11840 break;
11841
11842 case 0x11:
11843 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11844 or lo half xmm). */
11845 if (haveF2no66noF3(pfx)
11846 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11847 modrm = getUChar(delta);
11848 if (epartIsReg(modrm)) {
11849 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
11850 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11851 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11852 nameXMMReg(eregOfRexRM(pfx,modrm)));
11853 delta += 1;
11854 } else {
11855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11856 storeLE( mkexpr(addr),
11857 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11858 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11859 dis_buf);
11860 delta += alen;
11861 }
11862 goto decode_success;
11863 }
11864 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
11865 or lo 1/4 xmm). */
11866 if (haveF3no66noF2(pfx) && sz == 4) {
11867 modrm = getUChar(delta);
11868 if (epartIsReg(modrm)) {
11869 /* fall through, we don't yet have a test case */
11870 } else {
11871 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11872 storeLE( mkexpr(addr),
11873 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
11874 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11875 dis_buf);
11876 delta += alen;
11877 goto decode_success;
11878 }
11879 }
11880 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
11881 if (have66noF2noF3(pfx)
11882 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11883 modrm = getUChar(delta);
11884 if (epartIsReg(modrm)) {
11885 putXMMReg( eregOfRexRM(pfx,modrm),
sewardjcc3d2192013-03-27 11:37:33 +000011886 getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj80611e32012-01-20 13:07:24 +000011887 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
sewardjcc3d2192013-03-27 11:37:33 +000011888 nameXMMReg(eregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000011889 delta += 1;
11890 } else {
11891 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11892 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11893 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11894 dis_buf );
11895 delta += alen;
11896 }
11897 goto decode_success;
11898 }
11899 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
11900 if (haveNo66noF2noF3(pfx)
11901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11902 modrm = getUChar(delta);
11903 if (epartIsReg(modrm)) {
11904 /* fall through; awaiting test case */
11905 } else {
11906 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11907 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11908 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11909 dis_buf );
11910 delta += alen;
11911 goto decode_success;
11912 }
11913 }
11914 break;
11915
11916 case 0x12:
11917 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
11918 /* Identical to MOVLPS ? */
11919 if (have66noF2noF3(pfx)
11920 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11921 modrm = getUChar(delta);
11922 if (epartIsReg(modrm)) {
11923 /* fall through; apparently reg-reg is not possible */
11924 } else {
11925 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11926 delta += alen;
11927 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11928 0/*lower lane*/,
11929 loadLE(Ity_I64, mkexpr(addr)) );
11930 DIP("movlpd %s, %s\n",
11931 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11932 goto decode_success;
11933 }
11934 }
11935 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
11936 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
11937 if (haveNo66noF2noF3(pfx)
11938 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11939 modrm = getUChar(delta);
11940 if (epartIsReg(modrm)) {
11941 delta += 1;
11942 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11943 0/*lower lane*/,
11944 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
11945 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11946 nameXMMReg(gregOfRexRM(pfx,modrm)));
11947 } else {
11948 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11949 delta += alen;
11950 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
11951 loadLE(Ity_I64, mkexpr(addr)) );
11952 DIP("movlps %s, %s\n",
11953 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11954 }
11955 goto decode_success;
11956 }
11957 break;
11958
11959 case 0x13:
11960 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
11961 if (haveNo66noF2noF3(pfx)
11962 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11963 modrm = getUChar(delta);
11964 if (!epartIsReg(modrm)) {
11965 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11966 delta += alen;
11967 storeLE( mkexpr(addr),
11968 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11969 0/*lower lane*/ ) );
11970 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11971 dis_buf);
11972 goto decode_success;
11973 }
11974 /* else fall through */
11975 }
11976 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
11977 /* Identical to MOVLPS ? */
11978 if (have66noF2noF3(pfx)
11979 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11980 modrm = getUChar(delta);
11981 if (!epartIsReg(modrm)) {
11982 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11983 delta += alen;
11984 storeLE( mkexpr(addr),
11985 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11986 0/*lower lane*/ ) );
11987 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11988 dis_buf);
11989 goto decode_success;
11990 }
11991 /* else fall through */
11992 }
11993 break;
11994
11995 case 0x14:
11996 case 0x15:
11997 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
11998 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
11999 /* These just appear to be special cases of SHUFPS */
12000 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000012001 Bool hi = toBool(opc == 0x15);
12002 IRTemp sV = newTemp(Ity_V128);
12003 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012004 modrm = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000012005 UInt rG = gregOfRexRM(pfx,modrm);
12006 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000012007 if (epartIsReg(modrm)) {
sewardjc4530ae2012-05-21 10:18:49 +000012008 UInt rE = eregOfRexRM(pfx,modrm);
12009 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000012010 delta += 1;
12011 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000012012 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012013 } else {
12014 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12015 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12016 delta += alen;
12017 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000012018 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012019 }
sewardj56c30312012-06-12 08:45:39 +000012020 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000012021 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000012022 goto decode_success;
12023 }
12024 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12025 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12026 /* These just appear to be special cases of SHUFPS */
12027 if (have66noF2noF3(pfx)
12028 && sz == 2 /* could be 8 if rex also present */) {
sewardj56c30312012-06-12 08:45:39 +000012029 Bool hi = toBool(opc == 0x15);
sewardj80611e32012-01-20 13:07:24 +000012030 IRTemp sV = newTemp(Ity_V128);
12031 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012032 modrm = getUChar(delta);
sewardj56c30312012-06-12 08:45:39 +000012033 UInt rG = gregOfRexRM(pfx,modrm);
12034 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000012035 if (epartIsReg(modrm)) {
sewardj56c30312012-06-12 08:45:39 +000012036 UInt rE = eregOfRexRM(pfx,modrm);
12037 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000012038 delta += 1;
12039 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000012040 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012041 } else {
12042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12043 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12044 delta += alen;
12045 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000012046 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012047 }
sewardj56c30312012-06-12 08:45:39 +000012048 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12049 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000012050 goto decode_success;
12051 }
12052 break;
12053
12054 case 0x16:
12055 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12056 /* These seems identical to MOVHPS. This instruction encoding is
12057 completely crazy. */
12058 if (have66noF2noF3(pfx)
12059 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12060 modrm = getUChar(delta);
12061 if (epartIsReg(modrm)) {
12062 /* fall through; apparently reg-reg is not possible */
12063 } else {
12064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12065 delta += alen;
12066 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12067 loadLE(Ity_I64, mkexpr(addr)) );
12068 DIP("movhpd %s,%s\n", dis_buf,
12069 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12070 goto decode_success;
12071 }
12072 }
12073 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12074 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12075 if (haveNo66noF2noF3(pfx)
12076 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12077 modrm = getUChar(delta);
12078 if (epartIsReg(modrm)) {
12079 delta += 1;
12080 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12081 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12082 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12083 nameXMMReg(gregOfRexRM(pfx,modrm)));
12084 } else {
12085 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12086 delta += alen;
12087 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12088 loadLE(Ity_I64, mkexpr(addr)) );
12089 DIP("movhps %s,%s\n", dis_buf,
12090 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12091 }
12092 goto decode_success;
12093 }
12094 break;
12095
12096 case 0x17:
12097 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12098 if (haveNo66noF2noF3(pfx)
12099 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12100 modrm = getUChar(delta);
12101 if (!epartIsReg(modrm)) {
12102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12103 delta += alen;
12104 storeLE( mkexpr(addr),
12105 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12106 1/*upper lane*/ ) );
12107 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12108 dis_buf);
12109 goto decode_success;
12110 }
12111 /* else fall through */
12112 }
12113 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12114 /* Again, this seems identical to MOVHPS. */
12115 if (have66noF2noF3(pfx)
12116 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12117 modrm = getUChar(delta);
12118 if (!epartIsReg(modrm)) {
12119 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12120 delta += alen;
12121 storeLE( mkexpr(addr),
12122 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12123 1/*upper lane*/ ) );
12124 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12125 dis_buf);
12126 goto decode_success;
12127 }
12128 /* else fall through */
12129 }
12130 break;
12131
12132 case 0x18:
12133 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12134 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12135 /* 0F 18 /2 = PREFETCH1 */
12136 /* 0F 18 /3 = PREFETCH2 */
12137 if (haveNo66noF2noF3(pfx)
12138 && !epartIsReg(getUChar(delta))
12139 && gregLO3ofRM(getUChar(delta)) >= 0
12140 && gregLO3ofRM(getUChar(delta)) <= 3) {
florian55085f82012-11-21 00:36:55 +000012141 const HChar* hintstr = "??";
sewardj80611e32012-01-20 13:07:24 +000012142
12143 modrm = getUChar(delta);
12144 vassert(!epartIsReg(modrm));
12145
12146 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12147 delta += alen;
12148
12149 switch (gregLO3ofRM(modrm)) {
12150 case 0: hintstr = "nta"; break;
12151 case 1: hintstr = "t0"; break;
12152 case 2: hintstr = "t1"; break;
12153 case 3: hintstr = "t2"; break;
12154 default: vassert(0);
12155 }
12156
12157 DIP("prefetch%s %s\n", hintstr, dis_buf);
12158 goto decode_success;
12159 }
12160 break;
12161
12162 case 0x28:
12163 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12164 if (have66noF2noF3(pfx)
12165 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12166 modrm = getUChar(delta);
12167 if (epartIsReg(modrm)) {
12168 putXMMReg( gregOfRexRM(pfx,modrm),
12169 getXMMReg( eregOfRexRM(pfx,modrm) ));
12170 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12171 nameXMMReg(gregOfRexRM(pfx,modrm)));
12172 delta += 1;
12173 } else {
12174 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12175 gen_SEGV_if_not_16_aligned( addr );
12176 putXMMReg( gregOfRexRM(pfx,modrm),
12177 loadLE(Ity_V128, mkexpr(addr)) );
12178 DIP("movapd %s,%s\n", dis_buf,
12179 nameXMMReg(gregOfRexRM(pfx,modrm)));
12180 delta += alen;
12181 }
12182 goto decode_success;
12183 }
12184 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12185 if (haveNo66noF2noF3(pfx)
12186 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12187 modrm = getUChar(delta);
12188 if (epartIsReg(modrm)) {
12189 putXMMReg( gregOfRexRM(pfx,modrm),
12190 getXMMReg( eregOfRexRM(pfx,modrm) ));
12191 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12192 nameXMMReg(gregOfRexRM(pfx,modrm)));
12193 delta += 1;
12194 } else {
12195 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12196 gen_SEGV_if_not_16_aligned( addr );
12197 putXMMReg( gregOfRexRM(pfx,modrm),
12198 loadLE(Ity_V128, mkexpr(addr)) );
12199 DIP("movaps %s,%s\n", dis_buf,
12200 nameXMMReg(gregOfRexRM(pfx,modrm)));
12201 delta += alen;
12202 }
12203 goto decode_success;
12204 }
12205 break;
12206
12207 case 0x29:
12208 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12209 if (haveNo66noF2noF3(pfx)
12210 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12211 modrm = getUChar(delta);
12212 if (epartIsReg(modrm)) {
sewardjae4793e2012-08-23 18:49:59 +000012213 putXMMReg( eregOfRexRM(pfx,modrm),
12214 getXMMReg( gregOfRexRM(pfx,modrm) ));
12215 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12216 nameXMMReg(eregOfRexRM(pfx,modrm)));
12217 delta += 1;
sewardj80611e32012-01-20 13:07:24 +000012218 } else {
12219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12220 gen_SEGV_if_not_16_aligned( addr );
12221 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12222 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12223 dis_buf );
12224 delta += alen;
sewardj80611e32012-01-20 13:07:24 +000012225 }
sewardjae4793e2012-08-23 18:49:59 +000012226 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012227 }
12228 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12229 if (have66noF2noF3(pfx)
12230 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12231 modrm = getUChar(delta);
12232 if (epartIsReg(modrm)) {
12233 putXMMReg( eregOfRexRM(pfx,modrm),
sewardjcc3d2192013-03-27 11:37:33 +000012234 getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj80611e32012-01-20 13:07:24 +000012235 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
sewardjcc3d2192013-03-27 11:37:33 +000012236 nameXMMReg(eregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000012237 delta += 1;
12238 } else {
12239 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12240 gen_SEGV_if_not_16_aligned( addr );
12241 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12242 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12243 dis_buf );
12244 delta += alen;
12245 }
12246 goto decode_success;
12247 }
12248 break;
12249
12250 case 0x2A:
12251 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12252 half xmm */
12253 if (haveNo66noF2noF3(pfx) && sz == 4) {
12254 IRTemp arg64 = newTemp(Ity_I64);
12255 IRTemp rmode = newTemp(Ity_I32);
12256
12257 modrm = getUChar(delta);
12258 do_MMX_preamble();
12259 if (epartIsReg(modrm)) {
12260 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12261 delta += 1;
12262 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12263 nameXMMReg(gregOfRexRM(pfx,modrm)));
12264 } else {
12265 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12266 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12267 delta += alen;
12268 DIP("cvtpi2ps %s,%s\n", dis_buf,
12269 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12270 }
12271
12272 assign( rmode, get_sse_roundingmode() );
12273
12274 putXMMRegLane32F(
12275 gregOfRexRM(pfx,modrm), 0,
12276 binop(Iop_F64toF32,
12277 mkexpr(rmode),
12278 unop(Iop_I32StoF64,
12279 unop(Iop_64to32, mkexpr(arg64)) )) );
12280
12281 putXMMRegLane32F(
12282 gregOfRexRM(pfx,modrm), 1,
12283 binop(Iop_F64toF32,
12284 mkexpr(rmode),
12285 unop(Iop_I32StoF64,
12286 unop(Iop_64HIto32, mkexpr(arg64)) )) );
12287
12288 goto decode_success;
12289 }
12290 /* F3 0F 2A = CVTSI2SS
12291 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
12292 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
12293 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
12294 IRTemp rmode = newTemp(Ity_I32);
12295 assign( rmode, get_sse_roundingmode() );
12296 modrm = getUChar(delta);
12297 if (sz == 4) {
12298 IRTemp arg32 = newTemp(Ity_I32);
12299 if (epartIsReg(modrm)) {
12300 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12301 delta += 1;
12302 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12303 nameXMMReg(gregOfRexRM(pfx,modrm)));
12304 } else {
12305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12306 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
12307 delta += alen;
12308 DIP("cvtsi2ss %s,%s\n", dis_buf,
12309 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12310 }
12311 putXMMRegLane32F(
12312 gregOfRexRM(pfx,modrm), 0,
12313 binop(Iop_F64toF32,
12314 mkexpr(rmode),
12315 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
12316 } else {
12317 /* sz == 8 */
12318 IRTemp arg64 = newTemp(Ity_I64);
12319 if (epartIsReg(modrm)) {
12320 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
12321 delta += 1;
12322 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
12323 nameXMMReg(gregOfRexRM(pfx,modrm)));
12324 } else {
12325 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12326 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12327 delta += alen;
12328 DIP("cvtsi2ssq %s,%s\n", dis_buf,
12329 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12330 }
12331 putXMMRegLane32F(
12332 gregOfRexRM(pfx,modrm), 0,
12333 binop(Iop_F64toF32,
12334 mkexpr(rmode),
12335 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
12336 }
12337 goto decode_success;
12338 }
12339 /* F2 0F 2A = CVTSI2SD
12340 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
12341 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
12342 */
12343 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
12344 modrm = getUChar(delta);
12345 if (sz == 4) {
12346 IRTemp arg32 = newTemp(Ity_I32);
12347 if (epartIsReg(modrm)) {
12348 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12349 delta += 1;
sewardjc4530ae2012-05-21 10:18:49 +000012350 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12351 nameXMMReg(gregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000012352 } else {
12353 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12354 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
12355 delta += alen;
sewardjc4530ae2012-05-21 10:18:49 +000012356 DIP("cvtsi2sdl %s,%s\n", dis_buf,
12357 nameXMMReg(gregOfRexRM(pfx,modrm)) );
sewardj80611e32012-01-20 13:07:24 +000012358 }
12359 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
12360 unop(Iop_I32StoF64, mkexpr(arg32))
12361 );
12362 } else {
12363 /* sz == 8 */
12364 IRTemp arg64 = newTemp(Ity_I64);
12365 if (epartIsReg(modrm)) {
12366 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
12367 delta += 1;
12368 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
12369 nameXMMReg(gregOfRexRM(pfx,modrm)));
12370 } else {
12371 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12372 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12373 delta += alen;
12374 DIP("cvtsi2sdq %s,%s\n", dis_buf,
12375 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12376 }
12377 putXMMRegLane64F(
12378 gregOfRexRM(pfx,modrm),
12379 0,
12380 binop( Iop_I64StoF64,
12381 get_sse_roundingmode(),
12382 mkexpr(arg64)
12383 )
12384 );
12385 }
12386 goto decode_success;
12387 }
12388 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
12389 xmm(G) */
12390 if (have66noF2noF3(pfx) && sz == 2) {
12391 IRTemp arg64 = newTemp(Ity_I64);
12392
12393 modrm = getUChar(delta);
12394 if (epartIsReg(modrm)) {
12395 /* Only switch to MMX mode if the source is a MMX register.
12396 This is inconsistent with all other instructions which
12397 convert between XMM and (M64 or MMX), which always switch
12398 to MMX mode even if 64-bit operand is M64 and not MMX. At
12399 least, that's what the Intel docs seem to me to say.
12400 Fixes #210264. */
12401 do_MMX_preamble();
12402 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12403 delta += 1;
12404 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12405 nameXMMReg(gregOfRexRM(pfx,modrm)));
12406 } else {
12407 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12408 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12409 delta += alen;
12410 DIP("cvtpi2pd %s,%s\n", dis_buf,
12411 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12412 }
12413
12414 putXMMRegLane64F(
12415 gregOfRexRM(pfx,modrm), 0,
12416 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
12417 );
12418
12419 putXMMRegLane64F(
12420 gregOfRexRM(pfx,modrm), 1,
12421 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
12422 );
12423
12424 goto decode_success;
12425 }
12426 break;
12427
12428 case 0x2B:
12429 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
12430 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
12431 if ( (haveNo66noF2noF3(pfx) && sz == 4)
12432 || (have66noF2noF3(pfx) && sz == 2) ) {
12433 modrm = getUChar(delta);
12434 if (!epartIsReg(modrm)) {
12435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12436 gen_SEGV_if_not_16_aligned( addr );
12437 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12438 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
12439 dis_buf,
12440 nameXMMReg(gregOfRexRM(pfx,modrm)));
12441 delta += alen;
12442 goto decode_success;
12443 }
12444 /* else fall through */
12445 }
12446 break;
12447
12448 case 0x2C:
12449 case 0x2D:
12450 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
12451 I32 in mmx, according to prevailing SSE rounding mode */
12452 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
12453 I32 in mmx, rounding towards zero */
12454 if (haveNo66noF2noF3(pfx) && sz == 4) {
12455 IRTemp dst64 = newTemp(Ity_I64);
12456 IRTemp rmode = newTemp(Ity_I32);
12457 IRTemp f32lo = newTemp(Ity_F32);
12458 IRTemp f32hi = newTemp(Ity_F32);
12459 Bool r2zero = toBool(opc == 0x2C);
12460
12461 do_MMX_preamble();
12462 modrm = getUChar(delta);
12463
12464 if (epartIsReg(modrm)) {
12465 delta += 1;
12466 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
12467 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
12468 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
12469 nameXMMReg(eregOfRexRM(pfx,modrm)),
12470 nameMMXReg(gregLO3ofRM(modrm)));
12471 } else {
12472 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12473 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
12474 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
12475 mkexpr(addr),
12476 mkU64(4) )));
12477 delta += alen;
12478 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
12479 dis_buf,
12480 nameMMXReg(gregLO3ofRM(modrm)));
12481 }
12482
12483 if (r2zero) {
12484 assign(rmode, mkU32((UInt)Irrm_ZERO) );
12485 } else {
12486 assign( rmode, get_sse_roundingmode() );
12487 }
12488
12489 assign(
12490 dst64,
12491 binop( Iop_32HLto64,
12492 binop( Iop_F64toI32S,
12493 mkexpr(rmode),
12494 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
12495 binop( Iop_F64toI32S,
12496 mkexpr(rmode),
12497 unop( Iop_F32toF64, mkexpr(f32lo) ) )
12498 )
12499 );
12500
12501 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
12502 goto decode_success;
12503 }
12504 /* F3 0F 2D = CVTSS2SI
12505 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
12506 according to prevailing SSE rounding mode
12507 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
12508 according to prevailing SSE rounding mode
12509 */
12510 /* F3 0F 2C = CVTTSS2SI
12511 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
12512 truncating towards zero
12513 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
12514 truncating towards zero
12515 */
12516 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
sewardj80804d12012-05-22 10:48:13 +000012517 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000012518 goto decode_success;
12519 }
12520 /* F2 0F 2D = CVTSD2SI
12521 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
12522 according to prevailing SSE rounding mode
12523 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
12524 according to prevailing SSE rounding mode
12525 */
12526 /* F2 0F 2C = CVTTSD2SI
12527 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
12528 truncating towards zero
12529 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
12530 truncating towards zero
12531 */
12532 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000012533 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000012534 goto decode_success;
12535 }
12536 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
12537 I32 in mmx, according to prevailing SSE rounding mode */
12538 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
12539 I32 in mmx, rounding towards zero */
12540 if (have66noF2noF3(pfx) && sz == 2) {
12541 IRTemp dst64 = newTemp(Ity_I64);
12542 IRTemp rmode = newTemp(Ity_I32);
12543 IRTemp f64lo = newTemp(Ity_F64);
12544 IRTemp f64hi = newTemp(Ity_F64);
12545 Bool r2zero = toBool(opc == 0x2C);
12546
12547 do_MMX_preamble();
12548 modrm = getUChar(delta);
12549
12550 if (epartIsReg(modrm)) {
12551 delta += 1;
12552 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
12553 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
12554 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
12555 nameXMMReg(eregOfRexRM(pfx,modrm)),
12556 nameMMXReg(gregLO3ofRM(modrm)));
12557 } else {
12558 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12559 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
12560 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
12561 mkexpr(addr),
12562 mkU64(8) )));
12563 delta += alen;
12564 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
12565 dis_buf,
12566 nameMMXReg(gregLO3ofRM(modrm)));
12567 }
12568
12569 if (r2zero) {
12570 assign(rmode, mkU32((UInt)Irrm_ZERO) );
12571 } else {
12572 assign( rmode, get_sse_roundingmode() );
12573 }
12574
12575 assign(
12576 dst64,
12577 binop( Iop_32HLto64,
12578 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
12579 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
12580 )
12581 );
12582
12583 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
12584 goto decode_success;
12585 }
12586 break;
12587
12588 case 0x2E:
12589 case 0x2F:
sewardj80611e32012-01-20 13:07:24 +000012590 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000012591 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000012592 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000012593 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000012594 goto decode_success;
12595 }
sewardj80611e32012-01-20 13:07:24 +000012596 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000012597 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000012598 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000012599 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000012600 goto decode_success;
12601 }
12602 break;
12603
12604 case 0x50:
12605 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
12606 to 4 lowest bits of ireg(G) */
sewardj8eb7ae82012-06-24 14:00:27 +000012607 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
12608 && epartIsReg(getUChar(delta))) {
sewardj80611e32012-01-20 13:07:24 +000012609 /* sz == 8 is a kludge to handle insns with REX.W redundantly
12610 set to 1, which has been known to happen:
12611
12612 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
12613
12614 20071106: Intel docs say that REX.W isn't redundant: when
12615 present, a 64-bit register is written; when not present, only
12616 the 32-bit half is written. However, testing on a Core2
12617 machine suggests the entire 64 bit register is written
12618 irrespective of the status of REX.W. That could be because
12619 of the default rule that says "if the lower half of a 32-bit
12620 register is written, the upper half is zeroed". By using
12621 putIReg32 here we inadvertantly produce the same behaviour as
12622 the Core2, for the same reason -- putIReg32 implements said
12623 rule.
12624
12625 AMD docs give no indication that REX.W is even valid for this
12626 insn. */
sewardj8eb7ae82012-06-24 14:00:27 +000012627 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
12628 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012629 }
12630 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
12631 2 lowest bits of ireg(G) */
12632 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
12633 /* sz == 8 is a kludge to handle insns with REX.W redundantly
12634 set to 1, which has been known to happen:
12635 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
12636 20071106: see further comments on MOVMSKPS implementation above.
12637 */
sewardj8eb7ae82012-06-24 14:00:27 +000012638 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
12639 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012640 }
12641 break;
12642
12643 case 0x51:
12644 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
12645 if (haveF3no66noF2(pfx) && sz == 4) {
12646 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
12647 "sqrtss", Iop_Sqrt32F0x4 );
12648 goto decode_success;
12649 }
12650 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
12651 if (haveNo66noF2noF3(pfx) && sz == 4) {
12652 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12653 "sqrtps", Iop_Sqrt32Fx4 );
12654 goto decode_success;
12655 }
12656 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
12657 if (haveF2no66noF3(pfx) && sz == 4) {
12658 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
12659 "sqrtsd", Iop_Sqrt64F0x2 );
12660 goto decode_success;
12661 }
12662 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
12663 if (have66noF2noF3(pfx) && sz == 2) {
12664 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12665 "sqrtpd", Iop_Sqrt64Fx2 );
12666 goto decode_success;
12667 }
12668 break;
12669
12670 case 0x52:
12671 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
12672 if (haveF3no66noF2(pfx) && sz == 4) {
12673 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012674 "rsqrtss", Iop_RSqrtEst32F0x4 );
sewardj80611e32012-01-20 13:07:24 +000012675 goto decode_success;
12676 }
12677 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
12678 if (haveNo66noF2noF3(pfx) && sz == 4) {
12679 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012680 "rsqrtps", Iop_RSqrtEst32Fx4 );
sewardj80611e32012-01-20 13:07:24 +000012681 goto decode_success;
12682 }
12683 break;
12684
12685 case 0x53:
12686 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
12687 if (haveF3no66noF2(pfx) && sz == 4) {
12688 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012689 "rcpss", Iop_RecipEst32F0x4 );
sewardj80611e32012-01-20 13:07:24 +000012690 goto decode_success;
12691 }
12692 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
12693 if (haveNo66noF2noF3(pfx) && sz == 4) {
12694 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012695 "rcpps", Iop_RecipEst32Fx4 );
sewardj80611e32012-01-20 13:07:24 +000012696 goto decode_success;
12697 }
12698 break;
12699
12700 case 0x54:
12701 /* 0F 54 = ANDPS -- G = G and E */
12702 if (haveNo66noF2noF3(pfx) && sz == 4) {
12703 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
12704 goto decode_success;
12705 }
12706 /* 66 0F 54 = ANDPD -- G = G and E */
12707 if (have66noF2noF3(pfx) && sz == 2) {
12708 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
12709 goto decode_success;
12710 }
12711 break;
12712
12713 case 0x55:
12714 /* 0F 55 = ANDNPS -- G = (not G) and E */
12715 if (haveNo66noF2noF3(pfx) && sz == 4) {
12716 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
12717 Iop_AndV128 );
12718 goto decode_success;
12719 }
12720 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
12721 if (have66noF2noF3(pfx) && sz == 2) {
12722 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
12723 Iop_AndV128 );
12724 goto decode_success;
12725 }
12726 break;
12727
12728 case 0x56:
12729 /* 0F 56 = ORPS -- G = G and E */
12730 if (haveNo66noF2noF3(pfx) && sz == 4) {
12731 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
12732 goto decode_success;
12733 }
12734 /* 66 0F 56 = ORPD -- G = G and E */
12735 if (have66noF2noF3(pfx) && sz == 2) {
12736 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
12737 goto decode_success;
12738 }
12739 break;
12740
12741 case 0x57:
12742 /* 66 0F 57 = XORPD -- G = G xor E */
12743 if (have66noF2noF3(pfx) && sz == 2) {
12744 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
12745 goto decode_success;
12746 }
sewardjc4530ae2012-05-21 10:18:49 +000012747 /* 0F 57 = XORPS -- G = G xor E */
sewardj80611e32012-01-20 13:07:24 +000012748 if (haveNo66noF2noF3(pfx) && sz == 4) {
12749 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
12750 goto decode_success;
12751 }
12752 break;
12753
12754 case 0x58:
12755 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
12756 if (haveNo66noF2noF3(pfx) && sz == 4) {
12757 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
12758 goto decode_success;
12759 }
12760 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
12761 if (haveF3no66noF2(pfx) && sz == 4) {
12762 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
12763 goto decode_success;
12764 }
12765 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
12766 if (haveF2no66noF3(pfx)
12767 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12768 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
12769 goto decode_success;
12770 }
12771 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
12772 if (have66noF2noF3(pfx)
12773 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12774 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
12775 goto decode_success;
12776 }
12777 break;
12778
12779 case 0x59:
12780 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
12781 if (haveF2no66noF3(pfx)
12782 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12783 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
12784 goto decode_success;
12785 }
12786 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
12787 if (haveF3no66noF2(pfx) && sz == 4) {
12788 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
12789 goto decode_success;
12790 }
12791 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
12792 if (haveNo66noF2noF3(pfx) && sz == 4) {
12793 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
12794 goto decode_success;
12795 }
12796 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
12797 if (have66noF2noF3(pfx)
12798 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12799 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
12800 goto decode_success;
12801 }
12802 break;
12803
12804 case 0x5A:
12805 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
12806 F64 in xmm(G). */
12807 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012808 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012809 goto decode_success;
12810 }
12811 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
12812 low half xmm(G) */
12813 if (haveF3no66noF2(pfx) && sz == 4) {
12814 IRTemp f32lo = newTemp(Ity_F32);
12815
12816 modrm = getUChar(delta);
12817 if (epartIsReg(modrm)) {
12818 delta += 1;
12819 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
12820 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12821 nameXMMReg(gregOfRexRM(pfx,modrm)));
12822 } else {
12823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12824 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
12825 delta += alen;
12826 DIP("cvtss2sd %s,%s\n", dis_buf,
12827 nameXMMReg(gregOfRexRM(pfx,modrm)));
12828 }
12829
12830 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
12831 unop( Iop_F32toF64, mkexpr(f32lo) ) );
12832
12833 goto decode_success;
12834 }
12835 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
12836 low 1/4 xmm(G), according to prevailing SSE rounding mode */
12837 if (haveF2no66noF3(pfx) && sz == 4) {
12838 IRTemp rmode = newTemp(Ity_I32);
12839 IRTemp f64lo = newTemp(Ity_F64);
12840
12841 modrm = getUChar(delta);
12842 if (epartIsReg(modrm)) {
12843 delta += 1;
12844 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
12845 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12846 nameXMMReg(gregOfRexRM(pfx,modrm)));
12847 } else {
12848 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12849 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
12850 delta += alen;
12851 DIP("cvtsd2ss %s,%s\n", dis_buf,
12852 nameXMMReg(gregOfRexRM(pfx,modrm)));
12853 }
12854
12855 assign( rmode, get_sse_roundingmode() );
12856 putXMMRegLane32F(
12857 gregOfRexRM(pfx,modrm), 0,
12858 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
12859 );
12860
12861 goto decode_success;
12862 }
12863 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
12864 lo half xmm(G), rounding according to prevailing SSE rounding
12865 mode, and zero upper half */
12866 /* Note, this is practically identical to CVTPD2DQ. It would have
sewardjc4530ae2012-05-21 10:18:49 +000012867 be nice to merge them together. */
sewardj80611e32012-01-20 13:07:24 +000012868 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000012869 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012870 goto decode_success;
12871 }
12872 break;
12873
12874 case 0x5B:
12875 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12876 xmm(G), rounding towards zero */
12877 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12878 xmm(G), as per the prevailing rounding mode */
12879 if ( (have66noF2noF3(pfx) && sz == 2)
12880 || (haveF3no66noF2(pfx) && sz == 4) ) {
sewardj251b59e2012-05-25 13:51:07 +000012881 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
sewardj66becf32012-06-18 23:15:16 +000012882 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
sewardj80611e32012-01-20 13:07:24 +000012883 goto decode_success;
12884 }
12885 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
12886 xmm(G) */
12887 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012888 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012889 goto decode_success;
12890 }
12891 break;
12892
12893 case 0x5C:
12894 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
12895 if (haveF3no66noF2(pfx) && sz == 4) {
12896 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
12897 goto decode_success;
12898 }
12899 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
12900 if (haveF2no66noF3(pfx)
12901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12902 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
12903 goto decode_success;
12904 }
12905 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
12906 if (haveNo66noF2noF3(pfx) && sz == 4) {
12907 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
12908 goto decode_success;
12909 }
12910 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
12911 if (have66noF2noF3(pfx) && sz == 2) {
12912 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
12913 goto decode_success;
12914 }
12915 break;
12916
12917 case 0x5D:
12918 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
12919 if (haveNo66noF2noF3(pfx) && sz == 4) {
12920 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
12921 goto decode_success;
12922 }
12923 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
12924 if (haveF3no66noF2(pfx) && sz == 4) {
12925 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
12926 goto decode_success;
12927 }
12928 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
12929 if (haveF2no66noF3(pfx) && sz == 4) {
12930 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
12931 goto decode_success;
12932 }
12933 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
12934 if (have66noF2noF3(pfx) && sz == 2) {
12935 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
12936 goto decode_success;
12937 }
12938 break;
12939
12940 case 0x5E:
12941 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
12942 if (haveF2no66noF3(pfx) && sz == 4) {
12943 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
12944 goto decode_success;
12945 }
12946 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
12947 if (haveNo66noF2noF3(pfx) && sz == 4) {
12948 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
12949 goto decode_success;
12950 }
12951 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
12952 if (haveF3no66noF2(pfx) && sz == 4) {
12953 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
12954 goto decode_success;
12955 }
12956 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
12957 if (have66noF2noF3(pfx) && sz == 2) {
12958 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
12959 goto decode_success;
12960 }
12961 break;
12962
12963 case 0x5F:
12964 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
12965 if (haveNo66noF2noF3(pfx) && sz == 4) {
12966 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
12967 goto decode_success;
12968 }
12969 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
12970 if (haveF3no66noF2(pfx) && sz == 4) {
12971 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
12972 goto decode_success;
12973 }
12974 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
12975 if (haveF2no66noF3(pfx) && sz == 4) {
12976 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
12977 goto decode_success;
12978 }
12979 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
12980 if (have66noF2noF3(pfx) && sz == 2) {
12981 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
12982 goto decode_success;
12983 }
12984 break;
12985
12986 case 0x60:
12987 /* 66 0F 60 = PUNPCKLBW */
12988 if (have66noF2noF3(pfx) && sz == 2) {
12989 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12990 "punpcklbw",
12991 Iop_InterleaveLO8x16, True );
12992 goto decode_success;
12993 }
12994 break;
12995
12996 case 0x61:
12997 /* 66 0F 61 = PUNPCKLWD */
12998 if (have66noF2noF3(pfx) && sz == 2) {
12999 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13000 "punpcklwd",
13001 Iop_InterleaveLO16x8, True );
13002 goto decode_success;
13003 }
13004 break;
13005
13006 case 0x62:
13007 /* 66 0F 62 = PUNPCKLDQ */
13008 if (have66noF2noF3(pfx) && sz == 2) {
13009 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13010 "punpckldq",
13011 Iop_InterleaveLO32x4, True );
13012 goto decode_success;
13013 }
13014 break;
13015
13016 case 0x63:
13017 /* 66 0F 63 = PACKSSWB */
13018 if (have66noF2noF3(pfx) && sz == 2) {
13019 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13020 "packsswb",
13021 Iop_QNarrowBin16Sto8Sx16, True );
13022 goto decode_success;
13023 }
13024 break;
13025
13026 case 0x64:
13027 /* 66 0F 64 = PCMPGTB */
13028 if (have66noF2noF3(pfx) && sz == 2) {
13029 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13030 "pcmpgtb", Iop_CmpGT8Sx16, False );
13031 goto decode_success;
13032 }
13033 break;
13034
13035 case 0x65:
13036 /* 66 0F 65 = PCMPGTW */
13037 if (have66noF2noF3(pfx) && sz == 2) {
13038 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13039 "pcmpgtw", Iop_CmpGT16Sx8, False );
13040 goto decode_success;
13041 }
13042 break;
13043
13044 case 0x66:
13045 /* 66 0F 66 = PCMPGTD */
13046 if (have66noF2noF3(pfx) && sz == 2) {
13047 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13048 "pcmpgtd", Iop_CmpGT32Sx4, False );
13049 goto decode_success;
13050 }
13051 break;
13052
13053 case 0x67:
13054 /* 66 0F 67 = PACKUSWB */
13055 if (have66noF2noF3(pfx) && sz == 2) {
13056 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13057 "packuswb",
13058 Iop_QNarrowBin16Sto8Ux16, True );
13059 goto decode_success;
13060 }
13061 break;
13062
13063 case 0x68:
13064 /* 66 0F 68 = PUNPCKHBW */
13065 if (have66noF2noF3(pfx) && sz == 2) {
13066 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13067 "punpckhbw",
13068 Iop_InterleaveHI8x16, True );
13069 goto decode_success;
13070 }
13071 break;
13072
13073 case 0x69:
13074 /* 66 0F 69 = PUNPCKHWD */
13075 if (have66noF2noF3(pfx) && sz == 2) {
13076 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13077 "punpckhwd",
13078 Iop_InterleaveHI16x8, True );
13079 goto decode_success;
13080 }
13081 break;
13082
13083 case 0x6A:
13084 /* 66 0F 6A = PUNPCKHDQ */
13085 if (have66noF2noF3(pfx) && sz == 2) {
13086 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13087 "punpckhdq",
13088 Iop_InterleaveHI32x4, True );
13089 goto decode_success;
13090 }
13091 break;
13092
13093 case 0x6B:
13094 /* 66 0F 6B = PACKSSDW */
13095 if (have66noF2noF3(pfx) && sz == 2) {
13096 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13097 "packssdw",
13098 Iop_QNarrowBin32Sto16Sx8, True );
13099 goto decode_success;
13100 }
13101 break;
13102
13103 case 0x6C:
13104 /* 66 0F 6C = PUNPCKLQDQ */
13105 if (have66noF2noF3(pfx) && sz == 2) {
13106 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13107 "punpcklqdq",
13108 Iop_InterleaveLO64x2, True );
13109 goto decode_success;
13110 }
13111 break;
13112
13113 case 0x6D:
13114 /* 66 0F 6D = PUNPCKHQDQ */
13115 if (have66noF2noF3(pfx) && sz == 2) {
13116 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13117 "punpckhqdq",
13118 Iop_InterleaveHI64x2, True );
13119 goto decode_success;
13120 }
13121 break;
13122
13123 case 0x6E:
13124 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13125 zeroing high 3/4 of xmm. */
13126 /* or from ireg64/m64 to xmm lo 1/2,
13127 zeroing high 1/2 of xmm. */
13128 if (have66noF2noF3(pfx)) {
13129 vassert(sz == 2 || sz == 8);
13130 if (sz == 2) sz = 4;
13131 modrm = getUChar(delta);
13132 if (epartIsReg(modrm)) {
13133 delta += 1;
13134 if (sz == 4) {
13135 putXMMReg(
13136 gregOfRexRM(pfx,modrm),
13137 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13138 );
13139 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13140 nameXMMReg(gregOfRexRM(pfx,modrm)));
13141 } else {
13142 putXMMReg(
13143 gregOfRexRM(pfx,modrm),
13144 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13145 );
13146 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13147 nameXMMReg(gregOfRexRM(pfx,modrm)));
13148 }
13149 } else {
13150 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13151 delta += alen;
13152 putXMMReg(
13153 gregOfRexRM(pfx,modrm),
13154 sz == 4
13155 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13156 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13157 );
13158 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13159 nameXMMReg(gregOfRexRM(pfx,modrm)));
13160 }
13161 goto decode_success;
13162 }
13163 break;
13164
13165 case 0x6F:
13166 if (have66noF2noF3(pfx)
13167 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13168 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13169 modrm = getUChar(delta);
13170 if (epartIsReg(modrm)) {
13171 putXMMReg( gregOfRexRM(pfx,modrm),
13172 getXMMReg( eregOfRexRM(pfx,modrm) ));
13173 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13174 nameXMMReg(gregOfRexRM(pfx,modrm)));
13175 delta += 1;
13176 } else {
13177 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13178 gen_SEGV_if_not_16_aligned( addr );
13179 putXMMReg( gregOfRexRM(pfx,modrm),
13180 loadLE(Ity_V128, mkexpr(addr)) );
13181 DIP("movdqa %s,%s\n", dis_buf,
13182 nameXMMReg(gregOfRexRM(pfx,modrm)));
13183 delta += alen;
13184 }
13185 goto decode_success;
13186 }
13187 if (haveF3no66noF2(pfx) && sz == 4) {
13188 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13189 modrm = getUChar(delta);
13190 if (epartIsReg(modrm)) {
13191 putXMMReg( gregOfRexRM(pfx,modrm),
13192 getXMMReg( eregOfRexRM(pfx,modrm) ));
13193 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13194 nameXMMReg(gregOfRexRM(pfx,modrm)));
13195 delta += 1;
13196 } else {
13197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13198 putXMMReg( gregOfRexRM(pfx,modrm),
13199 loadLE(Ity_V128, mkexpr(addr)) );
13200 DIP("movdqu %s,%s\n", dis_buf,
13201 nameXMMReg(gregOfRexRM(pfx,modrm)));
13202 delta += alen;
13203 }
13204 goto decode_success;
13205 }
13206 break;
13207
13208 case 0x70:
13209 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13210 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000013211 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
sewardj80611e32012-01-20 13:07:24 +000013212 goto decode_success;
13213 }
13214 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13215 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13216 if (haveNo66noF2noF3(pfx) && sz == 4) {
13217 Int order;
13218 IRTemp sV, dV, s3, s2, s1, s0;
13219 s3 = s2 = s1 = s0 = IRTemp_INVALID;
13220 sV = newTemp(Ity_I64);
13221 dV = newTemp(Ity_I64);
13222 do_MMX_preamble();
13223 modrm = getUChar(delta);
13224 if (epartIsReg(modrm)) {
13225 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13226 order = (Int)getUChar(delta+1);
13227 delta += 1+1;
13228 DIP("pshufw $%d,%s,%s\n", order,
13229 nameMMXReg(eregLO3ofRM(modrm)),
13230 nameMMXReg(gregLO3ofRM(modrm)));
13231 } else {
13232 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13233 1/*extra byte after amode*/ );
13234 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13235 order = (Int)getUChar(delta+alen);
13236 delta += 1+alen;
13237 DIP("pshufw $%d,%s,%s\n", order,
13238 dis_buf,
13239 nameMMXReg(gregLO3ofRM(modrm)));
13240 }
13241 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
13242# define SEL(n) \
13243 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13244 assign(dV,
sewardjcc3d2192013-03-27 11:37:33 +000013245 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
sewardj80611e32012-01-20 13:07:24 +000013246 SEL((order>>2)&3), SEL((order>>0)&3) )
13247 );
13248 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
13249# undef SEL
13250 goto decode_success;
13251 }
13252 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13253 mem) to G(xmm), and copy upper half */
13254 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013255 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13256 False/*!isAvx*/, False/*!xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000013257 goto decode_success;
13258 }
13259 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13260 mem) to G(xmm), and copy lower half */
13261 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013262 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13263 False/*!isAvx*/, True/*xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000013264 goto decode_success;
13265 }
13266 break;
13267
13268 case 0x71:
13269 /* 66 0F 71 /2 ib = PSRLW by immediate */
13270 if (have66noF2noF3(pfx) && sz == 2
13271 && epartIsReg(getUChar(delta))
13272 && gregLO3ofRM(getUChar(delta)) == 2) {
13273 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
13274 goto decode_success;
13275 }
13276 /* 66 0F 71 /4 ib = PSRAW by immediate */
13277 if (have66noF2noF3(pfx) && sz == 2
13278 && epartIsReg(getUChar(delta))
13279 && gregLO3ofRM(getUChar(delta)) == 4) {
13280 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
13281 goto decode_success;
13282 }
13283 /* 66 0F 71 /6 ib = PSLLW by immediate */
13284 if (have66noF2noF3(pfx) && sz == 2
13285 && epartIsReg(getUChar(delta))
13286 && gregLO3ofRM(getUChar(delta)) == 6) {
13287 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
13288 goto decode_success;
13289 }
13290 break;
13291
13292 case 0x72:
13293 /* 66 0F 72 /2 ib = PSRLD by immediate */
13294 if (have66noF2noF3(pfx) && sz == 2
13295 && epartIsReg(getUChar(delta))
13296 && gregLO3ofRM(getUChar(delta)) == 2) {
13297 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
13298 goto decode_success;
13299 }
13300 /* 66 0F 72 /4 ib = PSRAD by immediate */
13301 if (have66noF2noF3(pfx) && sz == 2
13302 && epartIsReg(getUChar(delta))
13303 && gregLO3ofRM(getUChar(delta)) == 4) {
13304 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
13305 goto decode_success;
13306 }
13307 /* 66 0F 72 /6 ib = PSLLD by immediate */
13308 if (have66noF2noF3(pfx) && sz == 2
13309 && epartIsReg(getUChar(delta))
13310 && gregLO3ofRM(getUChar(delta)) == 6) {
13311 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
13312 goto decode_success;
13313 }
13314 break;
13315
13316 case 0x73:
13317 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
13318 /* note, if mem case ever filled in, 1 byte after amode */
13319 if (have66noF2noF3(pfx) && sz == 2
13320 && epartIsReg(getUChar(delta))
13321 && gregLO3ofRM(getUChar(delta)) == 3) {
sewardjc4530ae2012-05-21 10:18:49 +000013322 Int imm = (Int)getUChar(delta+1);
13323 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000013324 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
sewardj80611e32012-01-20 13:07:24 +000013325 delta += 2;
sewardjc4530ae2012-05-21 10:18:49 +000013326 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013327 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000013328 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000013329 goto decode_success;
13330 }
13331 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
13332 /* note, if mem case ever filled in, 1 byte after amode */
13333 if (have66noF2noF3(pfx) && sz == 2
13334 && epartIsReg(getUChar(delta))
13335 && gregLO3ofRM(getUChar(delta)) == 7) {
sewardj251b59e2012-05-25 13:51:07 +000013336 Int imm = (Int)getUChar(delta+1);
13337 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000013338 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
13339 vassert(imm >= 0 && imm <= 255);
13340 delta += 2;
sewardj251b59e2012-05-25 13:51:07 +000013341 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013342 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000013343 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000013344 goto decode_success;
13345 }
13346 /* 66 0F 73 /2 ib = PSRLQ by immediate */
13347 if (have66noF2noF3(pfx) && sz == 2
13348 && epartIsReg(getUChar(delta))
13349 && gregLO3ofRM(getUChar(delta)) == 2) {
13350 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
13351 goto decode_success;
13352 }
13353 /* 66 0F 73 /6 ib = PSLLQ by immediate */
13354 if (have66noF2noF3(pfx) && sz == 2
13355 && epartIsReg(getUChar(delta))
13356 && gregLO3ofRM(getUChar(delta)) == 6) {
13357 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
13358 goto decode_success;
13359 }
13360 break;
13361
13362 case 0x74:
13363 /* 66 0F 74 = PCMPEQB */
13364 if (have66noF2noF3(pfx) && sz == 2) {
13365 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13366 "pcmpeqb", Iop_CmpEQ8x16, False );
13367 goto decode_success;
13368 }
13369 break;
13370
13371 case 0x75:
13372 /* 66 0F 75 = PCMPEQW */
13373 if (have66noF2noF3(pfx) && sz == 2) {
13374 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13375 "pcmpeqw", Iop_CmpEQ16x8, False );
13376 goto decode_success;
13377 }
13378 break;
13379
13380 case 0x76:
13381 /* 66 0F 76 = PCMPEQD */
13382 if (have66noF2noF3(pfx) && sz == 2) {
13383 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13384 "pcmpeqd", Iop_CmpEQ32x4, False );
13385 goto decode_success;
13386 }
13387 break;
13388
13389 case 0x7E:
13390 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
13391 G (lo half xmm). Upper half of G is zeroed out. */
13392 if (haveF3no66noF2(pfx)
13393 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13394 modrm = getUChar(delta);
13395 if (epartIsReg(modrm)) {
13396 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
13397 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
13398 /* zero bits 127:64 */
13399 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
13400 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13401 nameXMMReg(gregOfRexRM(pfx,modrm)));
13402 delta += 1;
13403 } else {
13404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13405 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
13406 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
13407 loadLE(Ity_I64, mkexpr(addr)) );
13408 DIP("movsd %s,%s\n", dis_buf,
13409 nameXMMReg(gregOfRexRM(pfx,modrm)));
13410 delta += alen;
13411 }
13412 goto decode_success;
13413 }
13414 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
13415 /* or from xmm low 1/2 to ireg64 or m64. */
13416 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13417 if (sz == 2) sz = 4;
13418 modrm = getUChar(delta);
13419 if (epartIsReg(modrm)) {
13420 delta += 1;
13421 if (sz == 4) {
13422 putIReg32( eregOfRexRM(pfx,modrm),
13423 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
13424 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13425 nameIReg32(eregOfRexRM(pfx,modrm)));
sewardjcc3d2192013-03-27 11:37:33 +000013426 } else {
sewardj80611e32012-01-20 13:07:24 +000013427 putIReg64( eregOfRexRM(pfx,modrm),
13428 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
13429 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13430 nameIReg64(eregOfRexRM(pfx,modrm)));
sewardjcc3d2192013-03-27 11:37:33 +000013431 }
sewardj80611e32012-01-20 13:07:24 +000013432 } else {
13433 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13434 delta += alen;
13435 storeLE( mkexpr(addr),
13436 sz == 4
13437 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
13438 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
13439 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
13440 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13441 }
13442 goto decode_success;
13443 }
13444 break;
13445
13446 case 0x7F:
13447 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
13448 if (haveF3no66noF2(pfx) && sz == 4) {
13449 modrm = getUChar(delta);
13450 if (epartIsReg(modrm)) {
13451 goto decode_failure; /* awaiting test case */
13452 delta += 1;
13453 putXMMReg( eregOfRexRM(pfx,modrm),
13454 getXMMReg(gregOfRexRM(pfx,modrm)) );
13455 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13456 nameXMMReg(eregOfRexRM(pfx,modrm)));
13457 } else {
13458 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13459 delta += alen;
13460 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13461 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13462 }
13463 goto decode_success;
13464 }
13465 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
13466 if (have66noF2noF3(pfx) && sz == 2) {
13467 modrm = getUChar(delta);
13468 if (epartIsReg(modrm)) {
13469 delta += 1;
13470 putXMMReg( eregOfRexRM(pfx,modrm),
13471 getXMMReg(gregOfRexRM(pfx,modrm)) );
13472 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13473 nameXMMReg(eregOfRexRM(pfx,modrm)));
13474 } else {
13475 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13476 gen_SEGV_if_not_16_aligned( addr );
13477 delta += alen;
13478 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13479 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13480 }
13481 goto decode_success;
13482 }
13483 break;
13484
13485 case 0xAE:
13486 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
13487 if (haveNo66noF2noF3(pfx)
13488 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
13489 && sz == 4) {
13490 delta += 1;
13491 /* Insert a memory fence. It's sometimes important that these
13492 are carried through to the generated code. */
13493 stmt( IRStmt_MBE(Imbe_Fence) );
13494 DIP("sfence\n");
13495 goto decode_success;
13496 }
13497 /* mindless duplication follows .. */
13498 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
13499 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
13500 if (haveNo66noF2noF3(pfx)
13501 && epartIsReg(getUChar(delta))
13502 && (gregLO3ofRM(getUChar(delta)) == 5
13503 || gregLO3ofRM(getUChar(delta)) == 6)
13504 && sz == 4) {
13505 delta += 1;
13506 /* Insert a memory fence. It's sometimes important that these
13507 are carried through to the generated code. */
13508 stmt( IRStmt_MBE(Imbe_Fence) );
13509 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
13510 goto decode_success;
13511 }
sewardj30fc0582012-02-16 13:45:13 +000013512
13513 /* 0F AE /7 = CLFLUSH -- flush cache line */
13514 if (haveNo66noF2noF3(pfx)
13515 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
13516 && sz == 4) {
13517
13518 /* This is something of a hack. We need to know the size of
13519 the cache line containing addr. Since we don't (easily),
13520 assume 256 on the basis that no real cache would have a
13521 line that big. It's safe to invalidate more stuff than we
13522 need, just inefficient. */
13523 ULong lineszB = 256ULL;
13524
13525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13526 delta += alen;
13527
13528 /* Round addr down to the start of the containing block. */
13529 stmt( IRStmt_Put(
sewardj05f5e012014-05-04 10:52:11 +000013530 OFFB_CMSTART,
sewardj30fc0582012-02-16 13:45:13 +000013531 binop( Iop_And64,
13532 mkexpr(addr),
13533 mkU64( ~(lineszB-1) ))) );
13534
sewardj05f5e012014-05-04 10:52:11 +000013535 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
sewardj96c5f262012-04-13 23:03:45 +000013536
sewardj05f5e012014-05-04 10:52:11 +000013537 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
sewardj30fc0582012-02-16 13:45:13 +000013538
13539 DIP("clflush %s\n", dis_buf);
13540 goto decode_success;
13541 }
13542
sewardj80611e32012-01-20 13:07:24 +000013543 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
13544 if (haveNo66noF2noF3(pfx)
13545 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
13546 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000013547 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000013548 goto decode_success;
13549 }
13550 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
13551 if (haveNo66noF2noF3(pfx)
13552 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
13553 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000013554 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000013555 goto decode_success;
13556 }
13557 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
13558 Note that the presence or absence of REX.W slightly affects the
13559 written format: whether the saved FPU IP and DP pointers are 64
13560 or 32 bits. But the helper function we call simply writes zero
13561 bits in the relevant fields (which are 64 bits regardless of
13562 what REX.W is) and so it's good enough (iow, equally broken) in
13563 both cases. */
13564 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13565 && !epartIsReg(getUChar(delta))
13566 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
13567 IRDirty* d;
13568 modrm = getUChar(delta);
13569 vassert(!epartIsReg(modrm));
13570
13571 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13572 delta += alen;
13573 gen_SEGV_if_not_16_aligned(addr);
13574
13575 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
13576
13577 /* Uses dirty helper:
sewardj28d71ed2014-09-07 23:23:17 +000013578 void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
13579 ULong ) */
sewardj80611e32012-01-20 13:07:24 +000013580 d = unsafeIRDirty_0_N (
13581 0/*regparms*/,
sewardj28d71ed2014-09-07 23:23:17 +000013582 "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM",
13583 &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM,
florian90419562013-08-15 20:54:52 +000013584 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj80611e32012-01-20 13:07:24 +000013585 );
sewardj80611e32012-01-20 13:07:24 +000013586
13587 /* declare we're writing memory */
13588 d->mFx = Ifx_Write;
13589 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000013590 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000013591
13592 /* declare we're reading guest state */
sewardj28d71ed2014-09-07 23:23:17 +000013593 d->nFxState = 6;
sewardjc9069f22012-06-01 16:09:50 +000013594 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000013595
13596 d->fxState[0].fx = Ifx_Read;
13597 d->fxState[0].offset = OFFB_FTOP;
13598 d->fxState[0].size = sizeof(UInt);
13599
13600 d->fxState[1].fx = Ifx_Read;
13601 d->fxState[1].offset = OFFB_FPREGS;
13602 d->fxState[1].size = 8 * sizeof(ULong);
13603
13604 d->fxState[2].fx = Ifx_Read;
13605 d->fxState[2].offset = OFFB_FPTAGS;
13606 d->fxState[2].size = 8 * sizeof(UChar);
13607
13608 d->fxState[3].fx = Ifx_Read;
13609 d->fxState[3].offset = OFFB_FPROUND;
13610 d->fxState[3].size = sizeof(ULong);
13611
13612 d->fxState[4].fx = Ifx_Read;
13613 d->fxState[4].offset = OFFB_FC3210;
13614 d->fxState[4].size = sizeof(ULong);
13615
13616 d->fxState[5].fx = Ifx_Read;
sewardj28d71ed2014-09-07 23:23:17 +000013617 d->fxState[5].offset = OFFB_SSEROUND;
13618 d->fxState[5].size = sizeof(ULong);
sewardj80611e32012-01-20 13:07:24 +000013619
sewardj28d71ed2014-09-07 23:23:17 +000013620 /* Call the helper. This creates all parts of the in-memory
13621 image except for the XMM[0..15] array, which we do
13622 separately, in order that any undefinedness in the XMM
13623 registers is tracked separately by Memcheck and does not
13624 "infect" the in-memory shadow for the other parts of the
13625 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
13626 SSEROUND). */
sewardj80611e32012-01-20 13:07:24 +000013627 stmt( IRStmt_Dirty(d) );
13628
sewardj28d71ed2014-09-07 23:23:17 +000013629 /* And now the XMMs themselves. */
13630 UInt xmm;
13631 for (xmm = 0; xmm < 16; xmm++) {
13632 storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)),
13633 getXMMReg(xmm) );
13634 }
13635
sewardj80611e32012-01-20 13:07:24 +000013636 goto decode_success;
13637 }
13638 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
13639 As with FXSAVE above we ignore the value of REX.W since we're
13640 not bothering with the FPU DP and IP fields. */
13641 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13642 && !epartIsReg(getUChar(delta))
13643 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
13644 IRDirty* d;
13645 modrm = getUChar(delta);
13646 vassert(!epartIsReg(modrm));
13647
13648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13649 delta += alen;
13650 gen_SEGV_if_not_16_aligned(addr);
13651
13652 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
13653
13654 /* Uses dirty helper:
sewardj28d71ed2014-09-07 23:23:17 +000013655 VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
13656 ULong )
sewardj80611e32012-01-20 13:07:24 +000013657 NOTE:
sewardj28d71ed2014-09-07 23:23:17 +000013658 the VexEmNote value is simply ignored
sewardj80611e32012-01-20 13:07:24 +000013659 */
13660 d = unsafeIRDirty_0_N (
13661 0/*regparms*/,
sewardj28d71ed2014-09-07 23:23:17 +000013662 "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM",
13663 &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM,
florian90419562013-08-15 20:54:52 +000013664 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj80611e32012-01-20 13:07:24 +000013665 );
sewardj80611e32012-01-20 13:07:24 +000013666
13667 /* declare we're reading memory */
13668 d->mFx = Ifx_Read;
13669 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000013670 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000013671
13672 /* declare we're writing guest state */
sewardj28d71ed2014-09-07 23:23:17 +000013673 d->nFxState = 6;
sewardjc9069f22012-06-01 16:09:50 +000013674 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000013675
13676 d->fxState[0].fx = Ifx_Write;
13677 d->fxState[0].offset = OFFB_FTOP;
13678 d->fxState[0].size = sizeof(UInt);
13679
13680 d->fxState[1].fx = Ifx_Write;
13681 d->fxState[1].offset = OFFB_FPREGS;
13682 d->fxState[1].size = 8 * sizeof(ULong);
13683
13684 d->fxState[2].fx = Ifx_Write;
13685 d->fxState[2].offset = OFFB_FPTAGS;
13686 d->fxState[2].size = 8 * sizeof(UChar);
13687
13688 d->fxState[3].fx = Ifx_Write;
13689 d->fxState[3].offset = OFFB_FPROUND;
13690 d->fxState[3].size = sizeof(ULong);
13691
13692 d->fxState[4].fx = Ifx_Write;
13693 d->fxState[4].offset = OFFB_FC3210;
13694 d->fxState[4].size = sizeof(ULong);
13695
13696 d->fxState[5].fx = Ifx_Write;
sewardj28d71ed2014-09-07 23:23:17 +000013697 d->fxState[5].offset = OFFB_SSEROUND;
13698 d->fxState[5].size = sizeof(ULong);
sewardj80611e32012-01-20 13:07:24 +000013699
sewardj28d71ed2014-09-07 23:23:17 +000013700 /* Call the helper. This reads all parts of the in-memory
13701 image except for the XMM[0..15] array, which we do
13702 separately, in order that any undefinedness in the XMM
13703 registers is tracked separately by Memcheck and does not
13704 "infect" the in-guest-state shadow for the other parts of the
13705 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
13706 SSEROUND). */
sewardj80611e32012-01-20 13:07:24 +000013707 stmt( IRStmt_Dirty(d) );
13708
sewardj28d71ed2014-09-07 23:23:17 +000013709 /* And now the XMMs themselves. */
13710 UInt xmm;
13711 for (xmm = 0; xmm < 16; xmm++) {
13712 putXMMReg(xmm, loadLE(Ity_V128,
13713 binop(Iop_Add64, mkexpr(addr),
13714 mkU64(160 + xmm * 16))));
13715 }
13716
sewardj80611e32012-01-20 13:07:24 +000013717 goto decode_success;
13718 }
13719 break;
13720
13721 case 0xC2:
13722 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
13723 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013724 Long delta0 = delta;
13725 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
13726 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013727 }
13728 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
13729 if (haveF3no66noF2(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013730 Long delta0 = delta;
13731 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
13732 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013733 }
13734 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
13735 if (haveF2no66noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013736 Long delta0 = delta;
13737 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
13738 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013739 }
13740 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
13741 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000013742 Long delta0 = delta;
13743 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
13744 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013745 }
13746 break;
13747
13748 case 0xC3:
13749 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
13750 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13751 modrm = getUChar(delta);
13752 if (!epartIsReg(modrm)) {
13753 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13754 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
13755 DIP("movnti %s,%s\n", dis_buf,
13756 nameIRegG(sz, pfx, modrm));
13757 delta += alen;
13758 goto decode_success;
13759 }
13760 /* else fall through */
13761 }
13762 break;
13763
13764 case 0xC4:
13765 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13766 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13767 put it into the specified lane of mmx(G). */
13768 if (haveNo66noF2noF3(pfx)
13769 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13770 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
13771 mmx reg. t4 is the new lane value. t5 is the original
13772 mmx value. t6 is the new mmx value. */
13773 Int lane;
13774 t4 = newTemp(Ity_I16);
13775 t5 = newTemp(Ity_I64);
13776 t6 = newTemp(Ity_I64);
13777 modrm = getUChar(delta);
13778 do_MMX_preamble();
13779
13780 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
13781 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
13782
13783 if (epartIsReg(modrm)) {
13784 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
13785 delta += 1+1;
13786 lane = getUChar(delta-1);
13787 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13788 nameIReg16(eregOfRexRM(pfx,modrm)),
13789 nameMMXReg(gregLO3ofRM(modrm)));
13790 } else {
13791 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13792 delta += 1+alen;
13793 lane = getUChar(delta-1);
13794 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
13795 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13796 dis_buf,
13797 nameMMXReg(gregLO3ofRM(modrm)));
13798 }
13799
13800 switch (lane & 3) {
13801 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
13802 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
13803 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
13804 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
13805 default: vassert(0);
13806 }
13807 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
13808 goto decode_success;
13809 }
13810 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13811 put it into the specified lane of xmm(G). */
13812 if (have66noF2noF3(pfx)
13813 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13814 Int lane;
13815 t4 = newTemp(Ity_I16);
13816 modrm = getUChar(delta);
sewardj4ed05e02012-06-18 15:01:30 +000013817 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000013818 if (epartIsReg(modrm)) {
sewardj4ed05e02012-06-18 15:01:30 +000013819 UInt rE = eregOfRexRM(pfx,modrm);
13820 assign(t4, getIReg16(rE));
sewardj80611e32012-01-20 13:07:24 +000013821 delta += 1+1;
13822 lane = getUChar(delta-1);
sewardj4ed05e02012-06-18 15:01:30 +000013823 DIP("pinsrw $%d,%s,%s\n",
13824 (Int)lane, nameIReg16(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013825 } else {
13826 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13827 1/*byte after the amode*/ );
13828 delta += 1+alen;
13829 lane = getUChar(delta-1);
13830 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
sewardj4ed05e02012-06-18 15:01:30 +000013831 DIP("pinsrw $%d,%s,%s\n",
13832 (Int)lane, dis_buf, nameXMMReg(rG));
13833 }
13834 IRTemp src_vec = newTemp(Ity_V128);
13835 assign(src_vec, getXMMReg(rG));
13836 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
13837 putXMMReg(rG, mkexpr(res_vec));
sewardj80611e32012-01-20 13:07:24 +000013838 goto decode_success;
13839 }
13840 break;
13841
13842 case 0xC5:
13843 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13844 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
13845 zero-extend of it in ireg(G). */
13846 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13847 modrm = getUChar(delta);
13848 if (epartIsReg(modrm)) {
13849 IRTemp sV = newTemp(Ity_I64);
13850 t5 = newTemp(Ity_I16);
13851 do_MMX_preamble();
13852 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
13853 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
13854 switch (getUChar(delta+1) & 3) {
13855 case 0: assign(t5, mkexpr(t0)); break;
13856 case 1: assign(t5, mkexpr(t1)); break;
13857 case 2: assign(t5, mkexpr(t2)); break;
13858 case 3: assign(t5, mkexpr(t3)); break;
13859 default: vassert(0);
13860 }
13861 if (sz == 8)
13862 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
13863 else
13864 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
13865 DIP("pextrw $%d,%s,%s\n",
13866 (Int)getUChar(delta+1),
13867 nameMMXReg(eregLO3ofRM(modrm)),
13868 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
13869 : nameIReg32(gregOfRexRM(pfx,modrm))
13870 );
13871 delta += 2;
13872 goto decode_success;
13873 }
13874 /* else fall through */
13875 /* note, for anyone filling in the mem case: this insn has one
13876 byte after the amode and therefore you must pass 1 as the
13877 last arg to disAMode */
13878 }
13879 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
13880 zero-extend of it in ireg(G). */
13881 if (have66noF2noF3(pfx)
13882 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardje8a7eb72012-06-12 14:59:17 +000013883 Long delta0 = delta;
13884 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
13885 False/*!isAvx*/ );
13886 if (delta > delta0) goto decode_success;
13887 /* else fall through -- decoding has failed */
sewardj80611e32012-01-20 13:07:24 +000013888 }
13889 break;
13890
13891 case 0xC6:
13892 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
13893 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013894 Int imm8 = 0;
13895 IRTemp sV = newTemp(Ity_V128);
13896 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013897 modrm = getUChar(delta);
sewardj251b59e2012-05-25 13:51:07 +000013898 UInt rG = gregOfRexRM(pfx,modrm);
13899 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000013900 if (epartIsReg(modrm)) {
sewardj251b59e2012-05-25 13:51:07 +000013901 UInt rE = eregOfRexRM(pfx,modrm);
13902 assign( sV, getXMMReg(rE) );
13903 imm8 = (Int)getUChar(delta+1);
sewardj80611e32012-01-20 13:07:24 +000013904 delta += 1+1;
sewardj251b59e2012-05-25 13:51:07 +000013905 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013906 } else {
sewardj251b59e2012-05-25 13:51:07 +000013907 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000013908 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj251b59e2012-05-25 13:51:07 +000013909 imm8 = (Int)getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000013910 delta += 1+alen;
sewardj251b59e2012-05-25 13:51:07 +000013911 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013912 }
sewardj4b1cc832012-06-13 11:10:20 +000013913 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
sewardj251b59e2012-05-25 13:51:07 +000013914 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013915 goto decode_success;
13916 }
13917 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
13918 if (have66noF2noF3(pfx) && sz == 2) {
13919 Int select;
13920 IRTemp sV = newTemp(Ity_V128);
13921 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013922
13923 modrm = getUChar(delta);
13924 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13925
13926 if (epartIsReg(modrm)) {
13927 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13928 select = (Int)getUChar(delta+1);
13929 delta += 1+1;
13930 DIP("shufpd $%d,%s,%s\n", select,
13931 nameXMMReg(eregOfRexRM(pfx,modrm)),
13932 nameXMMReg(gregOfRexRM(pfx,modrm)));
13933 } else {
13934 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13935 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13936 select = getUChar(delta+alen);
13937 delta += 1+alen;
13938 DIP("shufpd $%d,%s,%s\n", select,
13939 dis_buf,
13940 nameXMMReg(gregOfRexRM(pfx,modrm)));
13941 }
13942
sewardj21459cb2012-06-18 14:05:52 +000013943 IRTemp res = math_SHUFPD_128( sV, dV, select );
13944 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013945 goto decode_success;
13946 }
13947 break;
13948
13949 case 0xD1:
13950 /* 66 0F D1 = PSRLW by E */
13951 if (have66noF2noF3(pfx) && sz == 2) {
13952 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
13953 goto decode_success;
13954 }
13955 break;
13956
13957 case 0xD2:
13958 /* 66 0F D2 = PSRLD by E */
13959 if (have66noF2noF3(pfx) && sz == 2) {
13960 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
13961 goto decode_success;
13962 }
13963 break;
13964
13965 case 0xD3:
13966 /* 66 0F D3 = PSRLQ by E */
13967 if (have66noF2noF3(pfx) && sz == 2) {
13968 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
13969 goto decode_success;
13970 }
13971 break;
13972
13973 case 0xD4:
13974 /* 66 0F D4 = PADDQ */
13975 if (have66noF2noF3(pfx) && sz == 2) {
13976 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13977 "paddq", Iop_Add64x2, False );
13978 goto decode_success;
13979 }
13980 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
13981 /* 0F D4 = PADDQ -- add 64x1 */
13982 if (haveNo66noF2noF3(pfx) && sz == 4) {
13983 do_MMX_preamble();
13984 delta = dis_MMXop_regmem_to_reg (
13985 vbi, pfx, delta, opc, "paddq", False );
13986 goto decode_success;
13987 }
13988 break;
13989
13990 case 0xD5:
sewardj251b59e2012-05-25 13:51:07 +000013991 /* 66 0F D5 = PMULLW -- 16x8 multiply */
sewardj80611e32012-01-20 13:07:24 +000013992 if (have66noF2noF3(pfx) && sz == 2) {
13993 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13994 "pmullw", Iop_Mul16x8, False );
13995 goto decode_success;
13996 }
13997 break;
13998
13999 case 0xD6:
14000 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14001 hi half). */
14002 if (haveF3no66noF2(pfx) && sz == 4) {
14003 modrm = getUChar(delta);
14004 if (epartIsReg(modrm)) {
14005 do_MMX_preamble();
14006 putXMMReg( gregOfRexRM(pfx,modrm),
14007 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14008 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14009 nameXMMReg(gregOfRexRM(pfx,modrm)));
14010 delta += 1;
14011 goto decode_success;
14012 }
14013 /* apparently no mem case for this insn */
14014 }
14015 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14016 or lo half xmm). */
14017 if (have66noF2noF3(pfx)
14018 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14019 modrm = getUChar(delta);
14020 if (epartIsReg(modrm)) {
14021 /* fall through, awaiting test case */
14022 /* dst: lo half copied, hi half zeroed */
14023 } else {
14024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14025 storeLE( mkexpr(addr),
14026 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14027 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14028 delta += alen;
14029 goto decode_success;
14030 }
14031 }
14032 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14033 if (haveF2no66noF3(pfx) && sz == 4) {
14034 modrm = getUChar(delta);
14035 if (epartIsReg(modrm)) {
14036 do_MMX_preamble();
14037 putMMXReg( gregLO3ofRM(modrm),
14038 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14039 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14040 nameMMXReg(gregLO3ofRM(modrm)));
14041 delta += 1;
14042 goto decode_success;
14043 }
14044 /* apparently no mem case for this insn */
14045 }
14046 break;
14047
14048 case 0xD7:
14049 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14050 lanes in xmm(E), turn them into a byte, and put
14051 zero-extend of it in ireg(G). Doing this directly is just
14052 too cumbersome; give up therefore and call a helper. */
14053 if (have66noF2noF3(pfx)
sewardj8ef22422012-05-24 16:29:18 +000014054 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14055 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14056 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14057 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000014058 }
14059 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14060 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
sewardje13074c2012-11-08 10:57:08 +000014061 mmx(E), turn them into a byte, and put zero-extend of it in
sewardj80611e32012-01-20 13:07:24 +000014062 ireg(G). */
tom558fc972012-02-24 12:16:11 +000014063 if (haveNo66noF2noF3(pfx)
14064 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardj80611e32012-01-20 13:07:24 +000014065 modrm = getUChar(delta);
14066 if (epartIsReg(modrm)) {
14067 do_MMX_preamble();
14068 t0 = newTemp(Ity_I64);
sewardje13074c2012-11-08 10:57:08 +000014069 t1 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000014070 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
sewardje13074c2012-11-08 10:57:08 +000014071 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14072 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
sewardj80611e32012-01-20 13:07:24 +000014073 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14074 nameIReg32(gregOfRexRM(pfx,modrm)));
14075 delta += 1;
14076 goto decode_success;
14077 }
14078 /* else fall through */
14079 }
14080 break;
14081
14082 case 0xD8:
sewardj251b59e2012-05-25 13:51:07 +000014083 /* 66 0F D8 = PSUBUSB */
sewardj80611e32012-01-20 13:07:24 +000014084 if (have66noF2noF3(pfx) && sz == 2) {
14085 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14086 "psubusb", Iop_QSub8Ux16, False );
14087 goto decode_success;
14088 }
14089 break;
14090
14091 case 0xD9:
sewardj4f228902012-06-21 09:17:58 +000014092 /* 66 0F D9 = PSUBUSW */
sewardj80611e32012-01-20 13:07:24 +000014093 if (have66noF2noF3(pfx) && sz == 2) {
14094 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14095 "psubusw", Iop_QSub16Ux8, False );
14096 goto decode_success;
14097 }
14098 break;
14099
14100 case 0xDA:
14101 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14102 /* 0F DA = PMINUB -- 8x8 unsigned min */
14103 if (haveNo66noF2noF3(pfx) && sz == 4) {
14104 do_MMX_preamble();
14105 delta = dis_MMXop_regmem_to_reg (
14106 vbi, pfx, delta, opc, "pminub", False );
14107 goto decode_success;
14108 }
14109 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14110 if (have66noF2noF3(pfx) && sz == 2) {
14111 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14112 "pminub", Iop_Min8Ux16, False );
14113 goto decode_success;
14114 }
14115 break;
14116
14117 case 0xDB:
14118 /* 66 0F DB = PAND */
14119 if (have66noF2noF3(pfx) && sz == 2) {
14120 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14121 goto decode_success;
14122 }
14123 break;
14124
14125 case 0xDC:
14126 /* 66 0F DC = PADDUSB */
14127 if (have66noF2noF3(pfx) && sz == 2) {
14128 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14129 "paddusb", Iop_QAdd8Ux16, False );
14130 goto decode_success;
14131 }
14132 break;
14133
14134 case 0xDD:
14135 /* 66 0F DD = PADDUSW */
14136 if (have66noF2noF3(pfx) && sz == 2) {
14137 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14138 "paddusw", Iop_QAdd16Ux8, False );
14139 goto decode_success;
14140 }
14141 break;
14142
14143 case 0xDE:
14144 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14145 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14146 if (haveNo66noF2noF3(pfx) && sz == 4) {
14147 do_MMX_preamble();
14148 delta = dis_MMXop_regmem_to_reg (
14149 vbi, pfx, delta, opc, "pmaxub", False );
14150 goto decode_success;
14151 }
14152 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14153 if (have66noF2noF3(pfx) && sz == 2) {
14154 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14155 "pmaxub", Iop_Max8Ux16, False );
14156 goto decode_success;
14157 }
14158 break;
14159
14160 case 0xDF:
14161 /* 66 0F DF = PANDN */
14162 if (have66noF2noF3(pfx) && sz == 2) {
14163 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14164 goto decode_success;
14165 }
14166 break;
14167
14168 case 0xE0:
14169 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14170 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14171 if (haveNo66noF2noF3(pfx) && sz == 4) {
14172 do_MMX_preamble();
14173 delta = dis_MMXop_regmem_to_reg (
14174 vbi, pfx, delta, opc, "pavgb", False );
14175 goto decode_success;
14176 }
14177 /* 66 0F E0 = PAVGB */
14178 if (have66noF2noF3(pfx) && sz == 2) {
14179 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14180 "pavgb", Iop_Avg8Ux16, False );
14181 goto decode_success;
14182 }
14183 break;
14184
14185 case 0xE1:
14186 /* 66 0F E1 = PSRAW by E */
14187 if (have66noF2noF3(pfx) && sz == 2) {
14188 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14189 goto decode_success;
14190 }
14191 break;
14192
14193 case 0xE2:
14194 /* 66 0F E2 = PSRAD by E */
14195 if (have66noF2noF3(pfx) && sz == 2) {
14196 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14197 goto decode_success;
14198 }
14199 break;
14200
14201 case 0xE3:
14202 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14203 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14204 if (haveNo66noF2noF3(pfx) && sz == 4) {
14205 do_MMX_preamble();
14206 delta = dis_MMXop_regmem_to_reg (
14207 vbi, pfx, delta, opc, "pavgw", False );
14208 goto decode_success;
14209 }
14210 /* 66 0F E3 = PAVGW */
14211 if (have66noF2noF3(pfx) && sz == 2) {
14212 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14213 "pavgw", Iop_Avg16Ux8, False );
14214 goto decode_success;
14215 }
14216 break;
14217
14218 case 0xE4:
14219 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14220 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14221 if (haveNo66noF2noF3(pfx) && sz == 4) {
14222 do_MMX_preamble();
14223 delta = dis_MMXop_regmem_to_reg (
14224 vbi, pfx, delta, opc, "pmuluh", False );
14225 goto decode_success;
14226 }
14227 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14228 if (have66noF2noF3(pfx) && sz == 2) {
14229 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14230 "pmulhuw", Iop_MulHi16Ux8, False );
14231 goto decode_success;
14232 }
14233 break;
14234
14235 case 0xE5:
14236 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14237 if (have66noF2noF3(pfx) && sz == 2) {
14238 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14239 "pmulhw", Iop_MulHi16Sx8, False );
14240 goto decode_success;
14241 }
14242 break;
14243
14244 case 0xE6:
14245 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14246 lo half xmm(G), and zero upper half, rounding towards zero */
14247 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14248 lo half xmm(G), according to prevailing rounding mode, and zero
14249 upper half */
14250 if ( (haveF2no66noF3(pfx) && sz == 4)
14251 || (have66noF2noF3(pfx) && sz == 2) ) {
sewardj66becf32012-06-18 23:15:16 +000014252 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14253 toBool(sz == 2)/*r2zero*/);
sewardj80611e32012-01-20 13:07:24 +000014254 goto decode_success;
14255 }
14256 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14257 F64 in xmm(G) */
14258 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj4b1cc832012-06-13 11:10:20 +000014259 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000014260 goto decode_success;
14261 }
14262 break;
14263
14264 case 0xE7:
14265 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14266 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14267 Intel manual does not say anything about the usual business of
14268 the FP reg tags getting trashed whenever an MMX insn happens.
14269 So we just leave them alone.
14270 */
14271 if (haveNo66noF2noF3(pfx) && sz == 4) {
14272 modrm = getUChar(delta);
14273 if (!epartIsReg(modrm)) {
14274 /* do_MMX_preamble(); Intel docs don't specify this */
14275 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14276 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14277 DIP("movntq %s,%s\n", dis_buf,
14278 nameMMXReg(gregLO3ofRM(modrm)));
14279 delta += alen;
14280 goto decode_success;
14281 }
14282 /* else fall through */
14283 }
14284 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14285 if (have66noF2noF3(pfx) && sz == 2) {
14286 modrm = getUChar(delta);
14287 if (!epartIsReg(modrm)) {
14288 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14289 gen_SEGV_if_not_16_aligned( addr );
14290 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14291 DIP("movntdq %s,%s\n", dis_buf,
14292 nameXMMReg(gregOfRexRM(pfx,modrm)));
14293 delta += alen;
14294 goto decode_success;
14295 }
14296 /* else fall through */
14297 }
14298 break;
14299
14300 case 0xE8:
14301 /* 66 0F E8 = PSUBSB */
14302 if (have66noF2noF3(pfx) && sz == 2) {
14303 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14304 "psubsb", Iop_QSub8Sx16, False );
14305 goto decode_success;
14306 }
14307 break;
14308
14309 case 0xE9:
14310 /* 66 0F E9 = PSUBSW */
14311 if (have66noF2noF3(pfx) && sz == 2) {
14312 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14313 "psubsw", Iop_QSub16Sx8, False );
14314 goto decode_success;
14315 }
14316 break;
14317
14318 case 0xEA:
14319 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14320 /* 0F EA = PMINSW -- 16x4 signed min */
14321 if (haveNo66noF2noF3(pfx) && sz == 4) {
14322 do_MMX_preamble();
14323 delta = dis_MMXop_regmem_to_reg (
14324 vbi, pfx, delta, opc, "pminsw", False );
14325 goto decode_success;
14326 }
14327 /* 66 0F EA = PMINSW -- 16x8 signed min */
14328 if (have66noF2noF3(pfx) && sz == 2) {
14329 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14330 "pminsw", Iop_Min16Sx8, False );
14331 goto decode_success;
14332 }
14333 break;
14334
14335 case 0xEB:
14336 /* 66 0F EB = POR */
14337 if (have66noF2noF3(pfx) && sz == 2) {
14338 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14339 goto decode_success;
14340 }
14341 break;
14342
14343 case 0xEC:
14344 /* 66 0F EC = PADDSB */
14345 if (have66noF2noF3(pfx) && sz == 2) {
14346 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14347 "paddsb", Iop_QAdd8Sx16, False );
14348 goto decode_success;
14349 }
14350 break;
14351
14352 case 0xED:
14353 /* 66 0F ED = PADDSW */
14354 if (have66noF2noF3(pfx) && sz == 2) {
14355 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14356 "paddsw", Iop_QAdd16Sx8, False );
14357 goto decode_success;
14358 }
14359 break;
14360
14361 case 0xEE:
14362 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14363 /* 0F EE = PMAXSW -- 16x4 signed max */
14364 if (haveNo66noF2noF3(pfx) && sz == 4) {
14365 do_MMX_preamble();
14366 delta = dis_MMXop_regmem_to_reg (
14367 vbi, pfx, delta, opc, "pmaxsw", False );
14368 goto decode_success;
14369 }
14370 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14371 if (have66noF2noF3(pfx) && sz == 2) {
14372 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14373 "pmaxsw", Iop_Max16Sx8, False );
14374 goto decode_success;
14375 }
14376 break;
14377
14378 case 0xEF:
14379 /* 66 0F EF = PXOR */
14380 if (have66noF2noF3(pfx) && sz == 2) {
14381 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
14382 goto decode_success;
14383 }
14384 break;
14385
14386 case 0xF1:
14387 /* 66 0F F1 = PSLLW by E */
14388 if (have66noF2noF3(pfx) && sz == 2) {
14389 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
14390 goto decode_success;
14391 }
14392 break;
14393
14394 case 0xF2:
14395 /* 66 0F F2 = PSLLD by E */
14396 if (have66noF2noF3(pfx) && sz == 2) {
14397 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
14398 goto decode_success;
14399 }
14400 break;
14401
14402 case 0xF3:
14403 /* 66 0F F3 = PSLLQ by E */
14404 if (have66noF2noF3(pfx) && sz == 2) {
14405 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
14406 goto decode_success;
14407 }
14408 break;
14409
14410 case 0xF4:
14411 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14412 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
14413 half */
sewardj80611e32012-01-20 13:07:24 +000014414 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000014415 IRTemp sV = newTemp(Ity_V128);
14416 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014417 modrm = getUChar(delta);
sewardje8a7eb72012-06-12 14:59:17 +000014418 UInt rG = gregOfRexRM(pfx,modrm);
14419 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000014420 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000014421 UInt rE = eregOfRexRM(pfx,modrm);
14422 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014423 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000014424 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014425 } else {
14426 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14427 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14428 delta += alen;
sewardje8a7eb72012-06-12 14:59:17 +000014429 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014430 }
sewardje8a7eb72012-06-12 14:59:17 +000014431 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
sewardj80611e32012-01-20 13:07:24 +000014432 goto decode_success;
14433 }
14434 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14435 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14436 0 to form 64-bit result */
14437 if (haveNo66noF2noF3(pfx) && sz == 4) {
14438 IRTemp sV = newTemp(Ity_I64);
14439 IRTemp dV = newTemp(Ity_I64);
14440 t1 = newTemp(Ity_I32);
14441 t0 = newTemp(Ity_I32);
14442 modrm = getUChar(delta);
14443
14444 do_MMX_preamble();
14445 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14446
14447 if (epartIsReg(modrm)) {
14448 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14449 delta += 1;
14450 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14451 nameMMXReg(gregLO3ofRM(modrm)));
14452 } else {
14453 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14454 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14455 delta += alen;
14456 DIP("pmuludq %s,%s\n", dis_buf,
14457 nameMMXReg(gregLO3ofRM(modrm)));
14458 }
14459
14460 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
14461 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
14462 putMMXReg( gregLO3ofRM(modrm),
14463 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
14464 goto decode_success;
14465 }
14466 break;
14467
14468 case 0xF5:
14469 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
14470 E(xmm or mem) to G(xmm) */
14471 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000014472 IRTemp sV = newTemp(Ity_V128);
14473 IRTemp dV = newTemp(Ity_V128);
14474 modrm = getUChar(delta);
14475 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014476 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014477 UInt rE = eregOfRexRM(pfx,modrm);
14478 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014479 delta += 1;
sewardj89378162012-06-24 12:12:20 +000014480 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014481 } else {
14482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj89378162012-06-24 12:12:20 +000014483 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000014484 delta += alen;
sewardj89378162012-06-24 12:12:20 +000014485 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014486 }
sewardj89378162012-06-24 12:12:20 +000014487 assign( dV, getXMMReg(rG) );
14488 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
sewardj80611e32012-01-20 13:07:24 +000014489 goto decode_success;
14490 }
14491 break;
14492
14493 case 0xF6:
14494 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14495 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
14496 if (haveNo66noF2noF3(pfx) && sz == 4) {
14497 do_MMX_preamble();
14498 delta = dis_MMXop_regmem_to_reg (
14499 vbi, pfx, delta, opc, "psadbw", False );
14500 goto decode_success;
14501 }
14502 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
14503 from E(xmm or mem) to G(xmm) */
14504 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000014505 IRTemp sV = newTemp(Ity_V128);
14506 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014507 modrm = getUChar(delta);
sewardj82096922012-06-24 14:57:59 +000014508 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014509 if (epartIsReg(modrm)) {
sewardj82096922012-06-24 14:57:59 +000014510 UInt rE = eregOfRexRM(pfx,modrm);
14511 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014512 delta += 1;
sewardj82096922012-06-24 14:57:59 +000014513 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014514 } else {
14515 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj82096922012-06-24 14:57:59 +000014516 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000014517 delta += alen;
sewardj82096922012-06-24 14:57:59 +000014518 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014519 }
sewardj82096922012-06-24 14:57:59 +000014520 assign( dV, getXMMReg(rG) );
14521 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
14522
sewardj80611e32012-01-20 13:07:24 +000014523 goto decode_success;
14524 }
14525 break;
14526
14527 case 0xF7:
14528 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14529 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
14530 if (haveNo66noF2noF3(pfx) && sz == 4) {
14531 Bool ok = False;
14532 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
14533 if (ok) goto decode_success;
14534 }
14535 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
sewardj8eb7ae82012-06-24 14:00:27 +000014536 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
14537 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
14538 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000014539 }
14540 break;
14541
14542 case 0xF8:
14543 /* 66 0F F8 = PSUBB */
14544 if (have66noF2noF3(pfx) && sz == 2) {
14545 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14546 "psubb", Iop_Sub8x16, False );
14547 goto decode_success;
14548 }
14549 break;
14550
14551 case 0xF9:
14552 /* 66 0F F9 = PSUBW */
14553 if (have66noF2noF3(pfx) && sz == 2) {
14554 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14555 "psubw", Iop_Sub16x8, False );
14556 goto decode_success;
14557 }
14558 break;
14559
14560 case 0xFA:
14561 /* 66 0F FA = PSUBD */
14562 if (have66noF2noF3(pfx) && sz == 2) {
14563 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14564 "psubd", Iop_Sub32x4, False );
14565 goto decode_success;
14566 }
14567 break;
14568
14569 case 0xFB:
14570 /* 66 0F FB = PSUBQ */
14571 if (have66noF2noF3(pfx) && sz == 2) {
14572 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14573 "psubq", Iop_Sub64x2, False );
14574 goto decode_success;
14575 }
14576 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14577 /* 0F FB = PSUBQ -- sub 64x1 */
14578 if (haveNo66noF2noF3(pfx) && sz == 4) {
14579 do_MMX_preamble();
14580 delta = dis_MMXop_regmem_to_reg (
14581 vbi, pfx, delta, opc, "psubq", False );
14582 goto decode_success;
14583 }
14584 break;
14585
14586 case 0xFC:
14587 /* 66 0F FC = PADDB */
14588 if (have66noF2noF3(pfx) && sz == 2) {
14589 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14590 "paddb", Iop_Add8x16, False );
14591 goto decode_success;
14592 }
14593 break;
14594
14595 case 0xFD:
14596 /* 66 0F FD = PADDW */
14597 if (have66noF2noF3(pfx) && sz == 2) {
14598 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14599 "paddw", Iop_Add16x8, False );
14600 goto decode_success;
14601 }
14602 break;
14603
14604 case 0xFE:
14605 /* 66 0F FE = PADDD */
14606 if (have66noF2noF3(pfx) && sz == 2) {
14607 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14608 "paddd", Iop_Add32x4, False );
14609 goto decode_success;
14610 }
14611 break;
14612
14613 default:
14614 goto decode_failure;
14615
14616 }
14617
14618 decode_failure:
14619 *decode_OK = False;
14620 return deltaIN;
14621
14622 decode_success:
14623 *decode_OK = True;
14624 return delta;
14625}
14626
14627
14628/*------------------------------------------------------------*/
14629/*--- ---*/
14630/*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
14631/*--- ---*/
14632/*------------------------------------------------------------*/
14633
floriancacba8e2014-12-15 18:58:07 +000014634static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjc4530ae2012-05-21 10:18:49 +000014635 Long delta, Bool isAvx )
14636{
14637 IRTemp addr = IRTemp_INVALID;
14638 Int alen = 0;
14639 HChar dis_buf[50];
14640 IRTemp sV = newTemp(Ity_V128);
14641 IRTemp d0 = newTemp(Ity_I64);
14642 UChar modrm = getUChar(delta);
14643 UInt rG = gregOfRexRM(pfx,modrm);
14644 if (epartIsReg(modrm)) {
14645 UInt rE = eregOfRexRM(pfx,modrm);
14646 assign( sV, getXMMReg(rE) );
14647 DIP("%smovddup %s,%s\n",
14648 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
14649 delta += 1;
14650 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
14651 } else {
14652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14653 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14654 DIP("%smovddup %s,%s\n",
14655 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
14656 delta += alen;
14657 }
14658 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14659 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
14660 return delta;
14661}
14662
14663
floriancacba8e2014-12-15 18:58:07 +000014664static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj82096922012-06-24 14:57:59 +000014665 Long delta )
14666{
14667 IRTemp addr = IRTemp_INVALID;
14668 Int alen = 0;
14669 HChar dis_buf[50];
14670 IRTemp d0 = newTemp(Ity_I64);
14671 IRTemp d1 = newTemp(Ity_I64);
14672 UChar modrm = getUChar(delta);
14673 UInt rG = gregOfRexRM(pfx,modrm);
14674 if (epartIsReg(modrm)) {
14675 UInt rE = eregOfRexRM(pfx,modrm);
14676 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
14677 delta += 1;
14678 assign ( d0, getYMMRegLane64(rE, 0) );
14679 assign ( d1, getYMMRegLane64(rE, 2) );
14680 } else {
14681 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14682 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14683 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
14684 mkexpr(addr), mkU64(16))) );
14685 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
14686 delta += alen;
14687 }
14688 putYMMRegLane64( rG, 0, mkexpr(d0) );
14689 putYMMRegLane64( rG, 1, mkexpr(d0) );
14690 putYMMRegLane64( rG, 2, mkexpr(d1) );
14691 putYMMRegLane64( rG, 3, mkexpr(d1) );
14692 return delta;
14693}
14694
14695
floriancacba8e2014-12-15 18:58:07 +000014696static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj15ad1942012-06-20 10:21:05 +000014697 Long delta, Bool isAvx, Bool isL )
14698{
14699 IRTemp addr = IRTemp_INVALID;
14700 Int alen = 0;
14701 HChar dis_buf[50];
14702 IRTemp sV = newTemp(Ity_V128);
14703 UChar modrm = getUChar(delta);
14704 UInt rG = gregOfRexRM(pfx,modrm);
14705 IRTemp s3, s2, s1, s0;
14706 s3 = s2 = s1 = s0 = IRTemp_INVALID;
14707 if (epartIsReg(modrm)) {
14708 UInt rE = eregOfRexRM(pfx,modrm);
14709 assign( sV, getXMMReg(rE) );
14710 DIP("%smovs%cdup %s,%s\n",
14711 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
14712 delta += 1;
14713 } else {
14714 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14715 if (!isAvx)
14716 gen_SEGV_if_not_16_aligned( addr );
14717 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14718 DIP("%smovs%cdup %s,%s\n",
14719 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
14720 delta += alen;
14721 }
14722 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14723 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14724 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
14725 : mkV128from32s( s3, s3, s1, s1 ) );
14726 return delta;
14727}
14728
14729
floriancacba8e2014-12-15 18:58:07 +000014730static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj15ad1942012-06-20 10:21:05 +000014731 Long delta, Bool isL )
14732{
14733 IRTemp addr = IRTemp_INVALID;
14734 Int alen = 0;
14735 HChar dis_buf[50];
14736 IRTemp sV = newTemp(Ity_V256);
sewardj15ad1942012-06-20 10:21:05 +000014737 UChar modrm = getUChar(delta);
14738 UInt rG = gregOfRexRM(pfx,modrm);
14739 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
14740 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
14741 if (epartIsReg(modrm)) {
14742 UInt rE = eregOfRexRM(pfx,modrm);
14743 assign( sV, getYMMReg(rE) );
14744 DIP("vmovs%cdup %s,%s\n",
14745 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
14746 delta += 1;
14747 } else {
14748 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14749 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
14750 DIP("vmovs%cdup %s,%s\n",
14751 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
14752 delta += alen;
14753 }
sewardj4f228902012-06-21 09:17:58 +000014754 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
sewardj15ad1942012-06-20 10:21:05 +000014755 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
14756 : mkV128from32s( s7, s7, s5, s5 ) );
14757 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
14758 : mkV128from32s( s3, s3, s1, s1 ) );
14759 return delta;
14760}
14761
14762
sewardjadf357c2012-06-24 13:44:17 +000014763static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14764{
14765 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
14766 IRTemp leftV = newTemp(Ity_V128);
14767 IRTemp rightV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014768 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000014769 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
14770
14771 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14772 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
14773
14774 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
14775 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
14776
14777 IRTemp res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014778 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
14779 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
14780 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
sewardjadf357c2012-06-24 13:44:17 +000014781 return res;
14782}
14783
14784
14785static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14786{
14787 IRTemp s1, s0, d1, d0;
14788 IRTemp leftV = newTemp(Ity_V128);
14789 IRTemp rightV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014790 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000014791 s1 = s0 = d1 = d0 = IRTemp_INVALID;
14792
14793 breakupV128to64s( sV, &s1, &s0 );
14794 breakupV128to64s( dV, &d1, &d0 );
14795
14796 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
14797 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
14798
14799 IRTemp res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014800 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
14801 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
14802 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
sewardjadf357c2012-06-24 13:44:17 +000014803 return res;
14804}
14805
14806
sewardj80611e32012-01-20 13:07:24 +000014807__attribute__((noinline))
14808static
14809Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000014810 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000014811 Prefix pfx, Int sz, Long deltaIN )
14812{
14813 IRTemp addr = IRTemp_INVALID;
14814 UChar modrm = 0;
14815 Int alen = 0;
14816 HChar dis_buf[50];
14817
14818 *decode_OK = False;
14819
14820 Long delta = deltaIN;
14821 UChar opc = getUChar(delta);
14822 delta++;
14823 switch (opc) {
14824
14825 case 0x12:
14826 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
14827 duplicating some lanes (2:2:0:0). */
14828 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014829 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14830 True/*isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014831 goto decode_success;
14832 }
14833 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
14834 duplicating some lanes (0:1:0:1). */
14835 if (haveF2no66noF3(pfx)
14836 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000014837 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000014838 goto decode_success;
14839 }
14840 break;
14841
14842 case 0x16:
14843 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
14844 duplicating some lanes (3:3:1:1). */
14845 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014846 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14847 False/*!isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014848 goto decode_success;
14849 }
14850 break;
14851
14852 case 0x7C:
14853 case 0x7D:
14854 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
14855 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
14856 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014857 IRTemp eV = newTemp(Ity_V128);
14858 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014859 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000014860 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014861 modrm = getUChar(delta);
14862 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014863 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014864 UInt rE = eregOfRexRM(pfx,modrm);
14865 assign( eV, getXMMReg(rE) );
14866 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014867 delta += 1;
14868 } else {
14869 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14870 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014871 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014872 delta += alen;
14873 }
14874
sewardjadf357c2012-06-24 13:44:17 +000014875 assign( gV, getXMMReg(rG) );
14876 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014877 goto decode_success;
14878 }
14879 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
14880 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
14881 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000014882 IRTemp eV = newTemp(Ity_V128);
14883 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014884 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000014885 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014886 modrm = getUChar(delta);
14887 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014888 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014889 UInt rE = eregOfRexRM(pfx,modrm);
14890 assign( eV, getXMMReg(rE) );
14891 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014892 delta += 1;
14893 } else {
14894 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14895 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014896 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014897 delta += alen;
14898 }
14899
sewardjadf357c2012-06-24 13:44:17 +000014900 assign( gV, getXMMReg(rG) );
14901 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014902 goto decode_success;
14903 }
14904 break;
14905
14906 case 0xD0:
14907 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
14908 if (have66noF2noF3(pfx) && sz == 2) {
14909 IRTemp eV = newTemp(Ity_V128);
14910 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014911 modrm = getUChar(delta);
14912 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014913 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014914 UInt rE = eregOfRexRM(pfx,modrm);
14915 assign( eV, getXMMReg(rE) );
14916 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014917 delta += 1;
14918 } else {
14919 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14920 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014921 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014922 delta += alen;
14923 }
14924
sewardj89378162012-06-24 12:12:20 +000014925 assign( gV, getXMMReg(rG) );
14926 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014927 goto decode_success;
14928 }
14929 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
14930 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014931 IRTemp eV = newTemp(Ity_V128);
14932 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014933 modrm = getUChar(delta);
14934 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014935
14936 modrm = getUChar(delta);
14937 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014938 UInt rE = eregOfRexRM(pfx,modrm);
14939 assign( eV, getXMMReg(rE) );
14940 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014941 delta += 1;
14942 } else {
14943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14944 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014945 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014946 delta += alen;
14947 }
14948
sewardj89378162012-06-24 12:12:20 +000014949 assign( gV, getXMMReg(rG) );
14950 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014951 goto decode_success;
14952 }
14953 break;
14954
14955 case 0xF0:
14956 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
14957 if (haveF2no66noF3(pfx) && sz == 4) {
14958 modrm = getUChar(delta);
14959 if (epartIsReg(modrm)) {
14960 goto decode_failure;
14961 } else {
14962 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14963 putXMMReg( gregOfRexRM(pfx,modrm),
14964 loadLE(Ity_V128, mkexpr(addr)) );
14965 DIP("lddqu %s,%s\n", dis_buf,
14966 nameXMMReg(gregOfRexRM(pfx,modrm)));
14967 delta += alen;
14968 }
14969 goto decode_success;
14970 }
14971 break;
14972
14973 default:
14974 goto decode_failure;
14975
14976 }
14977
14978 decode_failure:
14979 *decode_OK = False;
14980 return deltaIN;
14981
14982 decode_success:
14983 *decode_OK = True;
14984 return delta;
14985}
14986
14987
14988/*------------------------------------------------------------*/
14989/*--- ---*/
14990/*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
14991/*--- ---*/
14992/*------------------------------------------------------------*/
14993
sewardjc4530ae2012-05-21 10:18:49 +000014994static
14995IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
14996{
14997 IRTemp sHi = newTemp(Ity_I64);
14998 IRTemp sLo = newTemp(Ity_I64);
14999 IRTemp dHi = newTemp(Ity_I64);
15000 IRTemp dLo = newTemp(Ity_I64);
15001 IRTemp rHi = newTemp(Ity_I64);
15002 IRTemp rLo = newTemp(Ity_I64);
15003 IRTemp sevens = newTemp(Ity_I64);
15004 IRTemp mask0x80hi = newTemp(Ity_I64);
15005 IRTemp mask0x80lo = newTemp(Ity_I64);
15006 IRTemp maskBit3hi = newTemp(Ity_I64);
15007 IRTemp maskBit3lo = newTemp(Ity_I64);
15008 IRTemp sAnd7hi = newTemp(Ity_I64);
15009 IRTemp sAnd7lo = newTemp(Ity_I64);
15010 IRTemp permdHi = newTemp(Ity_I64);
15011 IRTemp permdLo = newTemp(Ity_I64);
15012 IRTemp res = newTemp(Ity_V128);
15013
15014 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15015 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15016 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15017 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15018
15019 assign( sevens, mkU64(0x0707070707070707ULL) );
15020
15021 /* mask0x80hi = Not(SarN8x8(sHi,7))
15022 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
15023 sAnd7hi = And(sHi,sevens)
15024 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
15025 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
15026 rHi = And(permdHi,mask0x80hi)
15027 */
15028 assign(
15029 mask0x80hi,
15030 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
15031
15032 assign(
15033 maskBit3hi,
15034 binop(Iop_SarN8x8,
15035 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
15036 mkU8(7)));
15037
15038 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
15039
15040 assign(
15041 permdHi,
15042 binop(
15043 Iop_Or64,
15044 binop(Iop_And64,
15045 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
15046 mkexpr(maskBit3hi)),
15047 binop(Iop_And64,
15048 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
15049 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
15050
15051 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
15052
15053 /* And the same for the lower half of the result. What fun. */
15054
15055 assign(
15056 mask0x80lo,
15057 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
15058
15059 assign(
15060 maskBit3lo,
15061 binop(Iop_SarN8x8,
15062 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
15063 mkU8(7)));
15064
15065 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
15066
15067 assign(
15068 permdLo,
15069 binop(
15070 Iop_Or64,
15071 binop(Iop_And64,
15072 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
15073 mkexpr(maskBit3lo)),
15074 binop(Iop_And64,
15075 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
15076 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
15077
15078 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
15079
15080 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
15081 return res;
15082}
15083
15084
sewardjcc3d2192013-03-27 11:37:33 +000015085static
15086IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15087{
15088 IRTemp sHi, sLo, dHi, dLo;
15089 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15090 breakupV256toV128s( dV, &dHi, &dLo);
15091 breakupV256toV128s( sV, &sHi, &sLo);
15092 IRTemp res = newTemp(Ity_V256);
15093 assign(res, binop(Iop_V128HLtoV256,
15094 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15095 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15096 return res;
15097}
15098
15099
floriancacba8e2014-12-15 18:58:07 +000015100static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardj8516a1f2012-06-24 14:26:30 +000015101 Bool isAvx, UChar opc )
15102{
15103 IRTemp addr = IRTemp_INVALID;
15104 Int alen = 0;
15105 HChar dis_buf[50];
florian55085f82012-11-21 00:36:55 +000015106 const HChar* str = "???";
sewardj8516a1f2012-06-24 14:26:30 +000015107 IROp opV64 = Iop_INVALID;
15108 IROp opCatO = Iop_CatOddLanes16x4;
15109 IROp opCatE = Iop_CatEvenLanes16x4;
15110 IRTemp sV = newTemp(Ity_V128);
15111 IRTemp dV = newTemp(Ity_V128);
15112 IRTemp sHi = newTemp(Ity_I64);
15113 IRTemp sLo = newTemp(Ity_I64);
15114 IRTemp dHi = newTemp(Ity_I64);
15115 IRTemp dLo = newTemp(Ity_I64);
15116 UChar modrm = getUChar(delta);
15117 UInt rG = gregOfRexRM(pfx,modrm);
15118 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15119
15120 switch (opc) {
15121 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15122 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15123 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15124 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15125 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15126 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15127 default: vassert(0);
15128 }
15129 if (opc == 0x02 || opc == 0x06) {
15130 opCatO = Iop_InterleaveHI32x2;
15131 opCatE = Iop_InterleaveLO32x2;
15132 }
15133
15134 assign( dV, getXMMReg(rV) );
15135
15136 if (epartIsReg(modrm)) {
15137 UInt rE = eregOfRexRM(pfx,modrm);
15138 assign( sV, getXMMReg(rE) );
sewardjcc3d2192013-03-27 11:37:33 +000015139 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15140 nameXMMReg(rE), nameXMMReg(rG));
sewardj8516a1f2012-06-24 14:26:30 +000015141 delta += 1;
15142 } else {
15143 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15144 if (!isAvx)
15145 gen_SEGV_if_not_16_aligned( addr );
15146 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjcc3d2192013-03-27 11:37:33 +000015147 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15148 dis_buf, nameXMMReg(rG));
sewardj8516a1f2012-06-24 14:26:30 +000015149 delta += alen;
15150 }
15151
15152 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15153 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15154 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15155 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15156
15157 /* This isn't a particularly efficient way to compute the
15158 result, but at least it avoids a proliferation of IROps,
15159 hence avoids complication all the backends. */
15160
15161 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15162 ( rG,
15163 binop(Iop_64HLtoV128,
15164 binop(opV64,
15165 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15166 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15167 binop(opV64,
15168 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15169 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15170 return delta;
15171}
15172
15173
floriancacba8e2014-12-15 18:58:07 +000015174static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15175 UChar opc )
sewardjcc3d2192013-03-27 11:37:33 +000015176{
15177 IRTemp addr = IRTemp_INVALID;
15178 Int alen = 0;
15179 HChar dis_buf[50];
15180 const HChar* str = "???";
15181 IROp opV64 = Iop_INVALID;
15182 IROp opCatO = Iop_CatOddLanes16x4;
15183 IROp opCatE = Iop_CatEvenLanes16x4;
15184 IRTemp sV = newTemp(Ity_V256);
15185 IRTemp dV = newTemp(Ity_V256);
15186 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15187 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15188 UChar modrm = getUChar(delta);
15189 UInt rG = gregOfRexRM(pfx,modrm);
15190 UInt rV = getVexNvvvv(pfx);
15191
15192 switch (opc) {
15193 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15194 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15195 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15196 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15197 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15198 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15199 default: vassert(0);
15200 }
15201 if (opc == 0x02 || opc == 0x06) {
15202 opCatO = Iop_InterleaveHI32x2;
15203 opCatE = Iop_InterleaveLO32x2;
15204 }
15205
15206 assign( dV, getYMMReg(rV) );
15207
15208 if (epartIsReg(modrm)) {
15209 UInt rE = eregOfRexRM(pfx,modrm);
15210 assign( sV, getYMMReg(rE) );
15211 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15212 delta += 1;
15213 } else {
15214 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15215 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15216 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15217 delta += alen;
15218 }
15219
15220 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15221 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15222
15223 /* This isn't a particularly efficient way to compute the
15224 result, but at least it avoids a proliferation of IROps,
15225 hence avoids complication all the backends. */
15226
15227 putYMMReg( rG,
15228 binop(Iop_V128HLtoV256,
15229 binop(Iop_64HLtoV128,
15230 binop(opV64,
15231 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15232 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15233 binop(opV64,
15234 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15235 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15236 binop(Iop_64HLtoV128,
15237 binop(opV64,
15238 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15239 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15240 binop(opV64,
15241 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15242 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15243 return delta;
15244}
15245
15246
sewardj8516a1f2012-06-24 14:26:30 +000015247static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15248{
15249 IRTemp sVoddsSX = newTemp(Ity_V128);
15250 IRTemp sVevensSX = newTemp(Ity_V128);
15251 IRTemp dVoddsZX = newTemp(Ity_V128);
15252 IRTemp dVevensZX = newTemp(Ity_V128);
15253 /* compute dV unsigned x sV signed */
15254 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
15255 assign( sVevensSX, binop(Iop_SarN16x8,
15256 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
15257 mkU8(8)) );
15258 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
15259 assign( dVevensZX, binop(Iop_ShrN16x8,
15260 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
15261 mkU8(8)) );
15262
15263 IRTemp res = newTemp(Ity_V128);
15264 assign( res, binop(Iop_QAdd16Sx8,
15265 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15266 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
15267 )
15268 );
15269 return res;
15270}
15271
15272
sewardjcc3d2192013-03-27 11:37:33 +000015273static
15274IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15275{
15276 IRTemp sHi, sLo, dHi, dLo;
15277 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15278 breakupV256toV128s( dV, &dHi, &dLo);
15279 breakupV256toV128s( sV, &sHi, &sLo);
15280 IRTemp res = newTemp(Ity_V256);
15281 assign(res, binop(Iop_V128HLtoV256,
15282 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15283 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15284 return res;
15285}
15286
15287
sewardj80611e32012-01-20 13:07:24 +000015288__attribute__((noinline))
15289static
15290Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000015291 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000015292 Prefix pfx, Int sz, Long deltaIN )
15293{
15294 IRTemp addr = IRTemp_INVALID;
15295 UChar modrm = 0;
15296 Int alen = 0;
15297 HChar dis_buf[50];
15298
15299 *decode_OK = False;
15300
15301 Long delta = deltaIN;
15302 UChar opc = getUChar(delta);
15303 delta++;
15304 switch (opc) {
15305
15306 case 0x00:
15307 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15308 if (have66noF2noF3(pfx)
15309 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000015310 IRTemp sV = newTemp(Ity_V128);
15311 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000015312
15313 modrm = getUChar(delta);
15314 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15315
15316 if (epartIsReg(modrm)) {
15317 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15318 delta += 1;
15319 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15320 nameXMMReg(gregOfRexRM(pfx,modrm)));
15321 } else {
15322 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15323 gen_SEGV_if_not_16_aligned( addr );
15324 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15325 delta += alen;
15326 DIP("pshufb %s,%s\n", dis_buf,
15327 nameXMMReg(gregOfRexRM(pfx,modrm)));
15328 }
15329
sewardjc4530ae2012-05-21 10:18:49 +000015330 IRTemp res = math_PSHUFB_XMM( dV, sV );
15331 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000015332 goto decode_success;
15333 }
15334 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15335 if (haveNo66noF2noF3(pfx) && sz == 4) {
15336 IRTemp sV = newTemp(Ity_I64);
15337 IRTemp dV = newTemp(Ity_I64);
15338
15339 modrm = getUChar(delta);
15340 do_MMX_preamble();
15341 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15342
15343 if (epartIsReg(modrm)) {
15344 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15345 delta += 1;
15346 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15347 nameMMXReg(gregLO3ofRM(modrm)));
15348 } else {
15349 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15350 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15351 delta += alen;
15352 DIP("pshufb %s,%s\n", dis_buf,
15353 nameMMXReg(gregLO3ofRM(modrm)));
15354 }
15355
15356 putMMXReg(
15357 gregLO3ofRM(modrm),
15358 binop(
15359 Iop_And64,
15360 /* permute the lanes */
15361 binop(
15362 Iop_Perm8x8,
15363 mkexpr(dV),
15364 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
15365 ),
15366 /* mask off lanes which have (index & 0x80) == 0x80 */
15367 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
15368 )
15369 );
15370 goto decode_success;
15371 }
15372 break;
15373
15374 case 0x01:
15375 case 0x02:
15376 case 0x03:
15377 case 0x05:
15378 case 0x06:
15379 case 0x07:
15380 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15381 G to G (xmm). */
15382 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15383 G to G (xmm). */
15384 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15385 xmm) and G to G (xmm). */
15386 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15387 G to G (xmm). */
15388 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15389 G to G (xmm). */
15390 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15391 xmm) and G to G (xmm). */
15392 if (have66noF2noF3(pfx)
15393 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000015394 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000015395 goto decode_success;
15396 }
15397 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15398 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15399 to G (mmx). */
15400 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15401 to G (mmx). */
15402 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15403 mmx) and G to G (mmx). */
15404 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15405 to G (mmx). */
15406 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15407 to G (mmx). */
15408 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15409 mmx) and G to G (mmx). */
15410 if (haveNo66noF2noF3(pfx) && sz == 4) {
florian55085f82012-11-21 00:36:55 +000015411 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015412 IROp opV64 = Iop_INVALID;
15413 IROp opCatO = Iop_CatOddLanes16x4;
15414 IROp opCatE = Iop_CatEvenLanes16x4;
15415 IRTemp sV = newTemp(Ity_I64);
15416 IRTemp dV = newTemp(Ity_I64);
15417
15418 modrm = getUChar(delta);
15419
15420 switch (opc) {
15421 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15422 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15423 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15424 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15425 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15426 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15427 default: vassert(0);
15428 }
15429 if (opc == 0x02 || opc == 0x06) {
15430 opCatO = Iop_InterleaveHI32x2;
15431 opCatE = Iop_InterleaveLO32x2;
15432 }
15433
15434 do_MMX_preamble();
15435 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15436
15437 if (epartIsReg(modrm)) {
15438 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15439 delta += 1;
15440 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15441 nameMMXReg(gregLO3ofRM(modrm)));
15442 } else {
15443 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15444 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15445 delta += alen;
15446 DIP("ph%s %s,%s\n", str, dis_buf,
15447 nameMMXReg(gregLO3ofRM(modrm)));
15448 }
15449
15450 putMMXReg(
15451 gregLO3ofRM(modrm),
15452 binop(opV64,
15453 binop(opCatE,mkexpr(sV),mkexpr(dV)),
15454 binop(opCatO,mkexpr(sV),mkexpr(dV))
15455 )
15456 );
15457 goto decode_success;
15458 }
15459 break;
15460
15461 case 0x04:
15462 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15463 Unsigned Bytes (XMM) */
15464 if (have66noF2noF3(pfx)
15465 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000015466 IRTemp sV = newTemp(Ity_V128);
15467 IRTemp dV = newTemp(Ity_V128);
15468 modrm = getUChar(delta);
15469 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000015470
sewardj8516a1f2012-06-24 14:26:30 +000015471 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000015472
15473 if (epartIsReg(modrm)) {
sewardj8516a1f2012-06-24 14:26:30 +000015474 UInt rE = eregOfRexRM(pfx,modrm);
15475 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000015476 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000015477 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000015478 } else {
15479 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15480 gen_SEGV_if_not_16_aligned( addr );
15481 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15482 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000015483 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000015484 }
15485
sewardj8516a1f2012-06-24 14:26:30 +000015486 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
sewardj80611e32012-01-20 13:07:24 +000015487 goto decode_success;
15488 }
15489 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15490 Unsigned Bytes (MMX) */
15491 if (haveNo66noF2noF3(pfx) && sz == 4) {
15492 IRTemp sV = newTemp(Ity_I64);
15493 IRTemp dV = newTemp(Ity_I64);
15494 IRTemp sVoddsSX = newTemp(Ity_I64);
15495 IRTemp sVevensSX = newTemp(Ity_I64);
15496 IRTemp dVoddsZX = newTemp(Ity_I64);
15497 IRTemp dVevensZX = newTemp(Ity_I64);
15498
15499 modrm = getUChar(delta);
15500 do_MMX_preamble();
15501 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15502
15503 if (epartIsReg(modrm)) {
15504 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15505 delta += 1;
15506 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15507 nameMMXReg(gregLO3ofRM(modrm)));
15508 } else {
15509 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15510 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15511 delta += alen;
15512 DIP("pmaddubsw %s,%s\n", dis_buf,
15513 nameMMXReg(gregLO3ofRM(modrm)));
15514 }
15515
15516 /* compute dV unsigned x sV signed */
15517 assign( sVoddsSX,
15518 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
15519 assign( sVevensSX,
15520 binop(Iop_SarN16x4,
15521 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
15522 mkU8(8)) );
15523 assign( dVoddsZX,
15524 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
15525 assign( dVevensZX,
15526 binop(Iop_ShrN16x4,
15527 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
15528 mkU8(8)) );
15529
15530 putMMXReg(
15531 gregLO3ofRM(modrm),
15532 binop(Iop_QAdd16Sx4,
15533 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15534 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
15535 )
15536 );
15537 goto decode_success;
15538 }
15539 break;
15540
15541 case 0x08:
15542 case 0x09:
15543 case 0x0A:
15544 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
15545 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
15546 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
15547 if (have66noF2noF3(pfx)
15548 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15549 IRTemp sV = newTemp(Ity_V128);
15550 IRTemp dV = newTemp(Ity_V128);
15551 IRTemp sHi = newTemp(Ity_I64);
15552 IRTemp sLo = newTemp(Ity_I64);
15553 IRTemp dHi = newTemp(Ity_I64);
15554 IRTemp dLo = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015555 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015556 Int laneszB = 0;
15557
15558 switch (opc) {
15559 case 0x08: laneszB = 1; str = "b"; break;
15560 case 0x09: laneszB = 2; str = "w"; break;
15561 case 0x0A: laneszB = 4; str = "d"; break;
15562 default: vassert(0);
15563 }
15564
15565 modrm = getUChar(delta);
15566 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15567
15568 if (epartIsReg(modrm)) {
15569 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15570 delta += 1;
15571 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
15572 nameXMMReg(gregOfRexRM(pfx,modrm)));
15573 } else {
15574 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15575 gen_SEGV_if_not_16_aligned( addr );
15576 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15577 delta += alen;
15578 DIP("psign%s %s,%s\n", str, dis_buf,
15579 nameXMMReg(gregOfRexRM(pfx,modrm)));
15580 }
15581
15582 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15583 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15584 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15585 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15586
15587 putXMMReg(
15588 gregOfRexRM(pfx,modrm),
15589 binop(Iop_64HLtoV128,
15590 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
15591 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
15592 )
15593 );
15594 goto decode_success;
15595 }
15596 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
15597 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
15598 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
15599 if (haveNo66noF2noF3(pfx) && sz == 4) {
15600 IRTemp sV = newTemp(Ity_I64);
15601 IRTemp dV = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015602 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015603 Int laneszB = 0;
15604
15605 switch (opc) {
15606 case 0x08: laneszB = 1; str = "b"; break;
15607 case 0x09: laneszB = 2; str = "w"; break;
15608 case 0x0A: laneszB = 4; str = "d"; break;
15609 default: vassert(0);
15610 }
15611
15612 modrm = getUChar(delta);
15613 do_MMX_preamble();
15614 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15615
15616 if (epartIsReg(modrm)) {
15617 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15618 delta += 1;
15619 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15620 nameMMXReg(gregLO3ofRM(modrm)));
15621 } else {
15622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15623 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15624 delta += alen;
15625 DIP("psign%s %s,%s\n", str, dis_buf,
15626 nameMMXReg(gregLO3ofRM(modrm)));
15627 }
15628
15629 putMMXReg(
15630 gregLO3ofRM(modrm),
15631 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
15632 );
15633 goto decode_success;
15634 }
15635 break;
15636
15637 case 0x0B:
15638 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
15639 Scale (XMM) */
15640 if (have66noF2noF3(pfx)
15641 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15642 IRTemp sV = newTemp(Ity_V128);
15643 IRTemp dV = newTemp(Ity_V128);
15644 IRTemp sHi = newTemp(Ity_I64);
15645 IRTemp sLo = newTemp(Ity_I64);
15646 IRTemp dHi = newTemp(Ity_I64);
15647 IRTemp dLo = newTemp(Ity_I64);
15648
15649 modrm = getUChar(delta);
15650 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15651
15652 if (epartIsReg(modrm)) {
15653 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15654 delta += 1;
15655 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15656 nameXMMReg(gregOfRexRM(pfx,modrm)));
15657 } else {
15658 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15659 gen_SEGV_if_not_16_aligned( addr );
15660 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15661 delta += alen;
15662 DIP("pmulhrsw %s,%s\n", dis_buf,
15663 nameXMMReg(gregOfRexRM(pfx,modrm)));
15664 }
15665
15666 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15667 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15668 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15669 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15670
15671 putXMMReg(
15672 gregOfRexRM(pfx,modrm),
15673 binop(Iop_64HLtoV128,
15674 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
15675 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
15676 )
15677 );
15678 goto decode_success;
15679 }
15680 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
15681 (MMX) */
15682 if (haveNo66noF2noF3(pfx) && sz == 4) {
15683 IRTemp sV = newTemp(Ity_I64);
15684 IRTemp dV = newTemp(Ity_I64);
15685
15686 modrm = getUChar(delta);
15687 do_MMX_preamble();
15688 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15689
15690 if (epartIsReg(modrm)) {
15691 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15692 delta += 1;
15693 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15694 nameMMXReg(gregLO3ofRM(modrm)));
15695 } else {
15696 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15697 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15698 delta += alen;
15699 DIP("pmulhrsw %s,%s\n", dis_buf,
15700 nameMMXReg(gregLO3ofRM(modrm)));
15701 }
15702
15703 putMMXReg(
15704 gregLO3ofRM(modrm),
15705 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
15706 );
15707 goto decode_success;
15708 }
15709 break;
15710
15711 case 0x1C:
15712 case 0x1D:
15713 case 0x1E:
15714 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
15715 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
15716 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
15717 if (have66noF2noF3(pfx)
15718 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj97f72452012-05-23 05:56:53 +000015719 IRTemp sV = newTemp(Ity_V128);
florian55085f82012-11-21 00:36:55 +000015720 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015721 Int laneszB = 0;
15722
15723 switch (opc) {
15724 case 0x1C: laneszB = 1; str = "b"; break;
15725 case 0x1D: laneszB = 2; str = "w"; break;
15726 case 0x1E: laneszB = 4; str = "d"; break;
15727 default: vassert(0);
15728 }
15729
15730 modrm = getUChar(delta);
sewardj80611e32012-01-20 13:07:24 +000015731 if (epartIsReg(modrm)) {
15732 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15733 delta += 1;
15734 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
15735 nameXMMReg(gregOfRexRM(pfx,modrm)));
15736 } else {
15737 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15738 gen_SEGV_if_not_16_aligned( addr );
15739 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15740 delta += alen;
15741 DIP("pabs%s %s,%s\n", str, dis_buf,
15742 nameXMMReg(gregOfRexRM(pfx,modrm)));
15743 }
15744
sewardj97f72452012-05-23 05:56:53 +000015745 putXMMReg( gregOfRexRM(pfx,modrm),
15746 mkexpr(math_PABS_XMM(sV, laneszB)) );
sewardj80611e32012-01-20 13:07:24 +000015747 goto decode_success;
15748 }
15749 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
15750 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
15751 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
15752 if (haveNo66noF2noF3(pfx) && sz == 4) {
15753 IRTemp sV = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015754 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015755 Int laneszB = 0;
15756
15757 switch (opc) {
15758 case 0x1C: laneszB = 1; str = "b"; break;
15759 case 0x1D: laneszB = 2; str = "w"; break;
15760 case 0x1E: laneszB = 4; str = "d"; break;
15761 default: vassert(0);
15762 }
15763
15764 modrm = getUChar(delta);
15765 do_MMX_preamble();
15766
15767 if (epartIsReg(modrm)) {
15768 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15769 delta += 1;
15770 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15771 nameMMXReg(gregLO3ofRM(modrm)));
15772 } else {
15773 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15774 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15775 delta += alen;
15776 DIP("pabs%s %s,%s\n", str, dis_buf,
15777 nameMMXReg(gregLO3ofRM(modrm)));
15778 }
15779
sewardj97f72452012-05-23 05:56:53 +000015780 putMMXReg( gregLO3ofRM(modrm),
15781 mkexpr(math_PABS_MMX( sV, laneszB )) );
sewardj80611e32012-01-20 13:07:24 +000015782 goto decode_success;
15783 }
15784 break;
15785
15786 default:
15787 break;
15788
15789 }
15790
15791 //decode_failure:
15792 *decode_OK = False;
15793 return deltaIN;
15794
15795 decode_success:
15796 *decode_OK = True;
15797 return delta;
15798}
15799
15800
15801/*------------------------------------------------------------*/
15802/*--- ---*/
15803/*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
15804/*--- ---*/
15805/*------------------------------------------------------------*/
15806
15807__attribute__((noinline))
15808static
15809Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000015810 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000015811 Prefix pfx, Int sz, Long deltaIN )
15812{
15813 Long d64 = 0;
15814 IRTemp addr = IRTemp_INVALID;
15815 UChar modrm = 0;
15816 Int alen = 0;
15817 HChar dis_buf[50];
15818
15819 *decode_OK = False;
15820
15821 Long delta = deltaIN;
15822 UChar opc = getUChar(delta);
15823 delta++;
15824 switch (opc) {
15825
15826 case 0x0F:
15827 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
15828 if (have66noF2noF3(pfx)
15829 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15830 IRTemp sV = newTemp(Ity_V128);
15831 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000015832
15833 modrm = getUChar(delta);
15834 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15835
15836 if (epartIsReg(modrm)) {
15837 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15838 d64 = (Long)getUChar(delta+1);
15839 delta += 1+1;
15840 DIP("palignr $%d,%s,%s\n", (Int)d64,
15841 nameXMMReg(eregOfRexRM(pfx,modrm)),
15842 nameXMMReg(gregOfRexRM(pfx,modrm)));
15843 } else {
15844 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15845 gen_SEGV_if_not_16_aligned( addr );
15846 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15847 d64 = (Long)getUChar(delta+alen);
15848 delta += alen+1;
15849 DIP("palignr $%d,%s,%s\n", (Int)d64,
15850 dis_buf,
15851 nameXMMReg(gregOfRexRM(pfx,modrm)));
15852 }
15853
sewardj151cd3e2012-06-18 13:56:55 +000015854 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
15855 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000015856 goto decode_success;
15857 }
15858 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
15859 if (haveNo66noF2noF3(pfx) && sz == 4) {
15860 IRTemp sV = newTemp(Ity_I64);
15861 IRTemp dV = newTemp(Ity_I64);
15862 IRTemp res = newTemp(Ity_I64);
15863
15864 modrm = getUChar(delta);
15865 do_MMX_preamble();
15866 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15867
15868 if (epartIsReg(modrm)) {
15869 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15870 d64 = (Long)getUChar(delta+1);
15871 delta += 1+1;
15872 DIP("palignr $%d,%s,%s\n", (Int)d64,
15873 nameMMXReg(eregLO3ofRM(modrm)),
15874 nameMMXReg(gregLO3ofRM(modrm)));
15875 } else {
15876 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15877 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15878 d64 = (Long)getUChar(delta+alen);
15879 delta += alen+1;
15880 DIP("palignr $%d%s,%s\n", (Int)d64,
15881 dis_buf,
15882 nameMMXReg(gregLO3ofRM(modrm)));
15883 }
15884
15885 if (d64 == 0) {
15886 assign( res, mkexpr(sV) );
15887 }
15888 else if (d64 >= 1 && d64 <= 7) {
15889 assign(res,
15890 binop(Iop_Or64,
15891 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
15892 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
15893 )));
15894 }
15895 else if (d64 == 8) {
15896 assign( res, mkexpr(dV) );
15897 }
15898 else if (d64 >= 9 && d64 <= 15) {
15899 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
15900 }
15901 else if (d64 >= 16 && d64 <= 255) {
15902 assign( res, mkU64(0) );
15903 }
15904 else
15905 vassert(0);
15906
15907 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
15908 goto decode_success;
15909 }
15910 break;
15911
15912 default:
15913 break;
15914
15915 }
15916
15917 //decode_failure:
15918 *decode_OK = False;
15919 return deltaIN;
15920
15921 decode_success:
15922 *decode_OK = True;
15923 return delta;
15924}
15925
15926
15927/*------------------------------------------------------------*/
15928/*--- ---*/
15929/*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
15930/*--- ---*/
15931/*------------------------------------------------------------*/
15932
15933__attribute__((noinline))
15934static
15935Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000015936 const VexArchInfo* archinfo,
15937 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000015938 Prefix pfx, Int sz, Long deltaIN )
15939{
15940 IRTemp addr = IRTemp_INVALID;
15941 IRType ty = Ity_INVALID;
15942 UChar modrm = 0;
15943 Int alen = 0;
15944 HChar dis_buf[50];
15945
15946 *decode_OK = False;
15947
15948 Long delta = deltaIN;
15949 UChar opc = getUChar(delta);
15950 delta++;
15951 switch (opc) {
15952
15953 case 0xB8:
15954 /* F3 0F B8 = POPCNT{W,L,Q}
15955 Count the number of 1 bits in a register
15956 */
15957 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
15958 && (sz == 2 || sz == 4 || sz == 8)) {
15959 /*IRType*/ ty = szToITy(sz);
15960 IRTemp src = newTemp(ty);
15961 modrm = getUChar(delta);
15962 if (epartIsReg(modrm)) {
15963 assign(src, getIRegE(sz, pfx, modrm));
15964 delta += 1;
15965 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15966 nameIRegG(sz, pfx, modrm));
15967 } else {
15968 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
15969 assign(src, loadLE(ty, mkexpr(addr)));
15970 delta += alen;
15971 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
15972 nameIRegG(sz, pfx, modrm));
15973 }
15974
15975 IRTemp result = gen_POPCOUNT(ty, src);
15976 putIRegG(sz, pfx, modrm, mkexpr(result));
15977
15978 // Update flags. This is pretty lame .. perhaps can do better
15979 // if this turns out to be performance critical.
15980 // O S A C P are cleared. Z is set if SRC == 0.
15981 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15982 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15983 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15984 stmt( IRStmt_Put( OFFB_CC_DEP1,
15985 binop(Iop_Shl64,
15986 unop(Iop_1Uto64,
15987 binop(Iop_CmpEQ64,
15988 widenUto64(mkexpr(src)),
15989 mkU64(0))),
15990 mkU8(AMD64G_CC_SHIFT_Z))));
15991
15992 goto decode_success;
15993 }
15994 break;
15995
sewardjcc3d2192013-03-27 11:37:33 +000015996 case 0xBC:
15997 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
15998 which we can only decode if we're sure this is a BMI1 capable cpu
15999 that supports TZCNT, since otherwise it's BSF, which behaves
16000 differently on zero source. */
16001 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16002 && (sz == 2 || sz == 4 || sz == 8)
16003 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16004 /*IRType*/ ty = szToITy(sz);
16005 IRTemp src = newTemp(ty);
16006 modrm = getUChar(delta);
16007 if (epartIsReg(modrm)) {
16008 assign(src, getIRegE(sz, pfx, modrm));
16009 delta += 1;
16010 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16011 nameIRegG(sz, pfx, modrm));
16012 } else {
16013 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16014 assign(src, loadLE(ty, mkexpr(addr)));
16015 delta += alen;
16016 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16017 nameIRegG(sz, pfx, modrm));
16018 }
16019
16020 IRTemp res = gen_TZCNT(ty, src);
16021 putIRegG(sz, pfx, modrm, mkexpr(res));
16022
16023 // Update flags. This is pretty lame .. perhaps can do better
16024 // if this turns out to be performance critical.
16025 // O S A P are cleared. Z is set if RESULT == 0.
16026 // C is set if SRC is zero.
16027 IRTemp src64 = newTemp(Ity_I64);
16028 IRTemp res64 = newTemp(Ity_I64);
16029 assign(src64, widenUto64(mkexpr(src)));
16030 assign(res64, widenUto64(mkexpr(res)));
16031
16032 IRTemp oszacp = newTemp(Ity_I64);
16033 assign(
16034 oszacp,
16035 binop(Iop_Or64,
16036 binop(Iop_Shl64,
16037 unop(Iop_1Uto64,
16038 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16039 mkU8(AMD64G_CC_SHIFT_Z)),
16040 binop(Iop_Shl64,
16041 unop(Iop_1Uto64,
16042 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16043 mkU8(AMD64G_CC_SHIFT_C))
16044 )
16045 );
16046
16047 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16048 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16049 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16050 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16051
16052 goto decode_success;
16053 }
16054 break;
16055
sewardj80611e32012-01-20 13:07:24 +000016056 case 0xBD:
16057 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16058 which we can only decode if we're sure this is an AMD cpu
16059 that supports LZCNT, since otherwise it's BSR, which behaves
16060 differently. Bizarrely, my Sandy Bridge also accepts these
16061 instructions but produces different results. */
16062 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16063 && (sz == 2 || sz == 4 || sz == 8)
16064 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16065 /*IRType*/ ty = szToITy(sz);
16066 IRTemp src = newTemp(ty);
16067 modrm = getUChar(delta);
16068 if (epartIsReg(modrm)) {
16069 assign(src, getIRegE(sz, pfx, modrm));
16070 delta += 1;
16071 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16072 nameIRegG(sz, pfx, modrm));
16073 } else {
16074 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16075 assign(src, loadLE(ty, mkexpr(addr)));
16076 delta += alen;
16077 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16078 nameIRegG(sz, pfx, modrm));
16079 }
16080
16081 IRTemp res = gen_LZCNT(ty, src);
16082 putIRegG(sz, pfx, modrm, mkexpr(res));
16083
16084 // Update flags. This is pretty lame .. perhaps can do better
16085 // if this turns out to be performance critical.
16086 // O S A P are cleared. Z is set if RESULT == 0.
16087 // C is set if SRC is zero.
16088 IRTemp src64 = newTemp(Ity_I64);
16089 IRTemp res64 = newTemp(Ity_I64);
16090 assign(src64, widenUto64(mkexpr(src)));
16091 assign(res64, widenUto64(mkexpr(res)));
16092
16093 IRTemp oszacp = newTemp(Ity_I64);
16094 assign(
16095 oszacp,
16096 binop(Iop_Or64,
16097 binop(Iop_Shl64,
16098 unop(Iop_1Uto64,
16099 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16100 mkU8(AMD64G_CC_SHIFT_Z)),
16101 binop(Iop_Shl64,
16102 unop(Iop_1Uto64,
16103 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16104 mkU8(AMD64G_CC_SHIFT_C))
16105 )
16106 );
16107
16108 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16109 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16110 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16111 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16112
16113 goto decode_success;
16114 }
16115 break;
16116
16117 default:
16118 break;
16119
16120 }
16121
16122 //decode_failure:
16123 *decode_OK = False;
16124 return deltaIN;
16125
16126 decode_success:
16127 *decode_OK = True;
16128 return delta;
16129}
16130
16131
16132/*------------------------------------------------------------*/
16133/*--- ---*/
16134/*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16135/*--- ---*/
16136/*------------------------------------------------------------*/
16137
sewardje8a7eb72012-06-12 14:59:17 +000016138static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16139 IRTemp vec0/*controlling mask*/,
16140 UInt gran, IROp opSAR )
sewardjc4530ae2012-05-21 10:18:49 +000016141{
16142 /* The tricky bit is to convert vec0 into a suitable mask, by
16143 copying the most significant bit of each lane into all positions
16144 in the lane. */
16145 IRTemp sh = newTemp(Ity_I8);
16146 assign(sh, mkU8(8 * gran - 1));
16147
16148 IRTemp mask = newTemp(Ity_V128);
16149 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16150
16151 IRTemp notmask = newTemp(Ity_V128);
16152 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16153
16154 IRTemp res = newTemp(Ity_V128);
16155 assign(res, binop(Iop_OrV128,
16156 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16157 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16158 return res;
16159}
16160
sewardj4c0a7ac2012-06-21 09:08:19 +000016161static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16162 IRTemp vec0/*controlling mask*/,
16163 UInt gran, IROp opSAR128 )
16164{
16165 /* The tricky bit is to convert vec0 into a suitable mask, by
16166 copying the most significant bit of each lane into all positions
16167 in the lane. */
16168 IRTemp sh = newTemp(Ity_I8);
16169 assign(sh, mkU8(8 * gran - 1));
16170
16171 IRTemp vec0Hi = IRTemp_INVALID;
16172 IRTemp vec0Lo = IRTemp_INVALID;
16173 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16174
16175 IRTemp mask = newTemp(Ity_V256);
16176 assign(mask, binop(Iop_V128HLtoV256,
16177 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16178 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16179
16180 IRTemp notmask = newTemp(Ity_V256);
16181 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16182
16183 IRTemp res = newTemp(Ity_V256);
16184 assign(res, binop(Iop_OrV256,
16185 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16186 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16187 return res;
16188}
16189
floriancacba8e2014-12-15 18:58:07 +000016190static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardj4c0a7ac2012-06-21 09:08:19 +000016191 const HChar *name, UInt gran, IROp opSAR )
16192{
16193 IRTemp addr = IRTemp_INVALID;
16194 Int alen = 0;
16195 HChar dis_buf[50];
16196 UChar modrm = getUChar(delta);
16197 UInt rG = gregOfRexRM(pfx, modrm);
16198 UInt rV = getVexNvvvv(pfx);
16199 UInt rIS4 = 0xFF; /* invalid */
16200 IRTemp vecE = newTemp(Ity_V128);
16201 IRTemp vecV = newTemp(Ity_V128);
16202 IRTemp vecIS4 = newTemp(Ity_V128);
16203 if (epartIsReg(modrm)) {
16204 delta++;
16205 UInt rE = eregOfRexRM(pfx, modrm);
16206 assign(vecE, getXMMReg(rE));
16207 UChar ib = getUChar(delta);
16208 rIS4 = (ib >> 4) & 0xF;
16209 DIP("%s %s,%s,%s,%s\n",
16210 name, nameXMMReg(rIS4), nameXMMReg(rE),
16211 nameXMMReg(rV), nameXMMReg(rG));
16212 } else {
16213 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16214 delta += alen;
16215 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16216 UChar ib = getUChar(delta);
16217 rIS4 = (ib >> 4) & 0xF;
16218 DIP("%s %s,%s,%s,%s\n",
16219 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16220 }
16221 delta++;
16222 assign(vecV, getXMMReg(rV));
16223 assign(vecIS4, getXMMReg(rIS4));
16224 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16225 putYMMRegLoAndZU( rG, mkexpr(res) );
16226 return delta;
16227}
16228
floriancacba8e2014-12-15 18:58:07 +000016229static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardj4c0a7ac2012-06-21 09:08:19 +000016230 const HChar *name, UInt gran, IROp opSAR128 )
16231{
16232 IRTemp addr = IRTemp_INVALID;
16233 Int alen = 0;
16234 HChar dis_buf[50];
16235 UChar modrm = getUChar(delta);
16236 UInt rG = gregOfRexRM(pfx, modrm);
16237 UInt rV = getVexNvvvv(pfx);
16238 UInt rIS4 = 0xFF; /* invalid */
16239 IRTemp vecE = newTemp(Ity_V256);
16240 IRTemp vecV = newTemp(Ity_V256);
16241 IRTemp vecIS4 = newTemp(Ity_V256);
16242 if (epartIsReg(modrm)) {
16243 delta++;
16244 UInt rE = eregOfRexRM(pfx, modrm);
16245 assign(vecE, getYMMReg(rE));
16246 UChar ib = getUChar(delta);
16247 rIS4 = (ib >> 4) & 0xF;
16248 DIP("%s %s,%s,%s,%s\n",
16249 name, nameYMMReg(rIS4), nameYMMReg(rE),
16250 nameYMMReg(rV), nameYMMReg(rG));
16251 } else {
16252 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16253 delta += alen;
16254 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16255 UChar ib = getUChar(delta);
16256 rIS4 = (ib >> 4) & 0xF;
16257 DIP("%s %s,%s,%s,%s\n",
16258 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16259 }
16260 delta++;
16261 assign(vecV, getYMMReg(rV));
16262 assign(vecIS4, getYMMReg(rIS4));
16263 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16264 putYMMReg( rG, mkexpr(res) );
16265 return delta;
16266}
16267
sewardjed1884d2012-06-21 08:53:48 +000016268static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16269{
16270 /* Set Z=1 iff (vecE & vecG) == 0
16271 Set C=1 iff (vecE & not vecG) == 0
16272 */
16273
16274 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16275
16276 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16277 and bottom 64-bits together. It relies on this trick:
16278
16279 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16280
16281 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16282 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16283
16284 and so the OR of the above 2 exprs produces
16285 [a OR b, a OR b], from which we simply take the lower half.
16286 */
16287 IRTemp and64 = newTemp(Ity_I64);
16288 IRTemp andn64 = newTemp(Ity_I64);
16289
16290 assign(and64,
16291 unop(Iop_V128to64,
16292 binop(Iop_OrV128,
16293 binop(Iop_InterleaveLO64x2,
16294 mkexpr(andV), mkexpr(andV)),
16295 binop(Iop_InterleaveHI64x2,
16296 mkexpr(andV), mkexpr(andV)))));
16297
16298 assign(andn64,
16299 unop(Iop_V128to64,
16300 binop(Iop_OrV128,
16301 binop(Iop_InterleaveLO64x2,
16302 mkexpr(andnV), mkexpr(andnV)),
16303 binop(Iop_InterleaveHI64x2,
16304 mkexpr(andnV), mkexpr(andnV)))));
16305
16306 IRTemp z64 = newTemp(Ity_I64);
16307 IRTemp c64 = newTemp(Ity_I64);
16308 if (sign == 64) {
16309 /* When only interested in the most significant bit, just shift
16310 arithmetically right and negate. */
16311 assign(z64,
16312 unop(Iop_Not64,
16313 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16314
16315 assign(c64,
16316 unop(Iop_Not64,
16317 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16318 } else {
16319 if (sign == 32) {
16320 /* When interested in bit 31 and bit 63, mask those bits and
16321 fallthrough into the PTEST handling. */
16322 IRTemp t0 = newTemp(Ity_I64);
16323 IRTemp t1 = newTemp(Ity_I64);
16324 IRTemp t2 = newTemp(Ity_I64);
16325 assign(t0, mkU64(0x8000000080000000ULL));
16326 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16327 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16328 and64 = t1;
16329 andn64 = t2;
16330 }
16331 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16332 slice out the Z and C bits conveniently. We use the standard
16333 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16334 done by "(x | -x) >>s (word-size - 1)".
16335 */
16336 assign(z64,
16337 unop(Iop_Not64,
16338 binop(Iop_Sar64,
16339 binop(Iop_Or64,
16340 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16341 mkexpr(and64)), mkU8(63))));
16342
16343 assign(c64,
16344 unop(Iop_Not64,
16345 binop(Iop_Sar64,
16346 binop(Iop_Or64,
16347 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16348 mkexpr(andn64)), mkU8(63))));
16349 }
16350
16351 /* And finally, slice out the Z and C flags and set the flags
16352 thunk to COPY for them. OSAP are set to zero. */
16353 IRTemp newOSZACP = newTemp(Ity_I64);
16354 assign(newOSZACP,
16355 binop(Iop_Or64,
16356 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16357 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16358
16359 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16360 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16361 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16362 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16363}
16364
16365
16366/* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16367 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
floriancacba8e2014-12-15 18:58:07 +000016368static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjed1884d2012-06-21 08:53:48 +000016369 Long delta, Bool isAvx, Int sign )
16370{
16371 IRTemp addr = IRTemp_INVALID;
16372 Int alen = 0;
16373 HChar dis_buf[50];
16374 UChar modrm = getUChar(delta);
16375 UInt rG = gregOfRexRM(pfx, modrm);
16376 IRTemp vecE = newTemp(Ity_V128);
16377 IRTemp vecG = newTemp(Ity_V128);
16378
16379 if ( epartIsReg(modrm) ) {
16380 UInt rE = eregOfRexRM(pfx, modrm);
16381 assign(vecE, getXMMReg(rE));
16382 delta += 1;
16383 DIP( "%s%stest%s %s,%s\n",
16384 isAvx ? "v" : "", sign == 0 ? "p" : "",
16385 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16386 nameXMMReg(rE), nameXMMReg(rG) );
16387 } else {
16388 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16389 if (!isAvx)
16390 gen_SEGV_if_not_16_aligned( addr );
16391 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16392 delta += alen;
16393 DIP( "%s%stest%s %s,%s\n",
16394 isAvx ? "v" : "", sign == 0 ? "p" : "",
16395 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16396 dis_buf, nameXMMReg(rG) );
16397 }
16398
16399 assign(vecG, getXMMReg(rG));
16400
16401 /* Set Z=1 iff (vecE & vecG) == 0
16402 Set C=1 iff (vecE & not vecG) == 0
16403 */
16404
16405 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16406 IRTemp andV = newTemp(Ity_V128);
16407 IRTemp andnV = newTemp(Ity_V128);
16408 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16409 assign(andnV, binop(Iop_AndV128,
16410 mkexpr(vecE),
16411 binop(Iop_XorV128, mkexpr(vecG),
16412 mkV128(0xFFFF))));
16413
16414 finish_xTESTy ( andV, andnV, sign );
16415 return delta;
16416}
16417
16418
16419/* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16420 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
floriancacba8e2014-12-15 18:58:07 +000016421static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjed1884d2012-06-21 08:53:48 +000016422 Long delta, Int sign )
16423{
16424 IRTemp addr = IRTemp_INVALID;
16425 Int alen = 0;
16426 HChar dis_buf[50];
16427 UChar modrm = getUChar(delta);
16428 UInt rG = gregOfRexRM(pfx, modrm);
16429 IRTemp vecE = newTemp(Ity_V256);
16430 IRTemp vecG = newTemp(Ity_V256);
16431
16432 if ( epartIsReg(modrm) ) {
16433 UInt rE = eregOfRexRM(pfx, modrm);
16434 assign(vecE, getYMMReg(rE));
16435 delta += 1;
16436 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16437 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16438 nameYMMReg(rE), nameYMMReg(rG) );
16439 } else {
16440 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16441 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
16442 delta += alen;
16443 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16444 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16445 dis_buf, nameYMMReg(rG) );
16446 }
16447
16448 assign(vecG, getYMMReg(rG));
16449
16450 /* Set Z=1 iff (vecE & vecG) == 0
16451 Set C=1 iff (vecE & not vecG) == 0
16452 */
16453
16454 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16455 IRTemp andV = newTemp(Ity_V256);
16456 IRTemp andnV = newTemp(Ity_V256);
16457 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
16458 assign(andnV, binop(Iop_AndV256,
16459 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
16460
16461 IRTemp andVhi = IRTemp_INVALID;
16462 IRTemp andVlo = IRTemp_INVALID;
16463 IRTemp andnVhi = IRTemp_INVALID;
16464 IRTemp andnVlo = IRTemp_INVALID;
16465 breakupV256toV128s( andV, &andVhi, &andVlo );
16466 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
16467
16468 IRTemp andV128 = newTemp(Ity_V128);
16469 IRTemp andnV128 = newTemp(Ity_V128);
16470 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
16471 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
16472
16473 finish_xTESTy ( andV128, andnV128, sign );
16474 return delta;
16475}
16476
sewardjc4530ae2012-05-21 10:18:49 +000016477
sewardj6fcd43e2012-06-14 08:51:35 +000016478/* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
floriancacba8e2014-12-15 18:58:07 +000016479static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000016480 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000016481{
16482 IRTemp addr = IRTemp_INVALID;
16483 Int alen = 0;
16484 HChar dis_buf[50];
16485 IRTemp srcVec = newTemp(Ity_V128);
16486 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016487 const HChar* mbV = isAvx ? "v" : "";
16488 const HChar how = xIsZ ? 'z' : 's';
sewardj6fcd43e2012-06-14 08:51:35 +000016489 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000016490 if ( epartIsReg(modrm) ) {
sewardj6fcd43e2012-06-14 08:51:35 +000016491 UInt rE = eregOfRexRM(pfx, modrm);
16492 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000016493 delta += 1;
sewardj6fcd43e2012-06-14 08:51:35 +000016494 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016495 } else {
16496 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16497 assign( srcVec,
16498 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16499 delta += alen;
sewardj6fcd43e2012-06-14 08:51:35 +000016500 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016501 }
16502
16503 IRExpr* res
sewardj6fcd43e2012-06-14 08:51:35 +000016504 = xIsZ /* do math for either zero or sign extend */
16505 ? binop( Iop_InterleaveLO8x16,
16506 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
16507 : binop( Iop_SarN16x8,
16508 binop( Iop_ShlN16x8,
16509 binop( Iop_InterleaveLO8x16,
16510 IRExpr_Const( IRConst_V128(0) ),
16511 mkexpr(srcVec) ),
16512 mkU8(8) ),
16513 mkU8(8) );
sewardjc4530ae2012-05-21 10:18:49 +000016514
sewardj6fcd43e2012-06-14 08:51:35 +000016515 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
sewardjc4530ae2012-05-21 10:18:49 +000016516
16517 return delta;
16518}
16519
16520
sewardjcc3d2192013-03-27 11:37:33 +000016521/* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
floriancacba8e2014-12-15 18:58:07 +000016522static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000016523 Long delta, Bool xIsZ )
16524{
16525 IRTemp addr = IRTemp_INVALID;
16526 Int alen = 0;
16527 HChar dis_buf[50];
16528 IRTemp srcVec = newTemp(Ity_V128);
16529 UChar modrm = getUChar(delta);
16530 UChar how = xIsZ ? 'z' : 's';
16531 UInt rG = gregOfRexRM(pfx, modrm);
16532 if ( epartIsReg(modrm) ) {
16533 UInt rE = eregOfRexRM(pfx, modrm);
16534 assign( srcVec, getXMMReg(rE) );
16535 delta += 1;
16536 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16537 } else {
16538 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16539 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
16540 delta += alen;
16541 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16542 }
16543
16544 /* First do zero extend. */
16545 IRExpr* res
16546 = binop( Iop_V128HLtoV256,
16547 binop( Iop_InterleaveHI8x16,
16548 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16549 binop( Iop_InterleaveLO8x16,
16550 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16551 /* And if needed sign extension as well. */
16552 if (!xIsZ)
16553 res = binop( Iop_SarN16x16,
16554 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
16555
16556 putYMMReg ( rG, res );
16557
16558 return delta;
16559}
16560
16561
floriancacba8e2014-12-15 18:58:07 +000016562static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000016563 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000016564{
16565 IRTemp addr = IRTemp_INVALID;
16566 Int alen = 0;
16567 HChar dis_buf[50];
16568 IRTemp srcVec = newTemp(Ity_V128);
16569 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016570 const HChar* mbV = isAvx ? "v" : "";
16571 const HChar how = xIsZ ? 'z' : 's';
sewardj8516a1f2012-06-24 14:26:30 +000016572 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000016573
16574 if ( epartIsReg(modrm) ) {
sewardj8516a1f2012-06-24 14:26:30 +000016575 UInt rE = eregOfRexRM(pfx, modrm);
16576 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000016577 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000016578 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016579 } else {
16580 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16581 assign( srcVec,
16582 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16583 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000016584 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016585 }
16586
16587 IRExpr* res
16588 = binop( Iop_InterleaveLO16x8,
16589 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
sewardj6fcd43e2012-06-14 08:51:35 +000016590 if (!xIsZ)
16591 res = binop(Iop_SarN32x4,
16592 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
sewardjc4530ae2012-05-21 10:18:49 +000016593
sewardje8a7eb72012-06-12 14:59:17 +000016594 (isAvx ? putYMMRegLoAndZU : putXMMReg)
sewardjc4530ae2012-05-21 10:18:49 +000016595 ( gregOfRexRM(pfx, modrm), res );
16596
16597 return delta;
16598}
16599
16600
floriancacba8e2014-12-15 18:58:07 +000016601static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000016602 Long delta, Bool xIsZ )
16603{
16604 IRTemp addr = IRTemp_INVALID;
16605 Int alen = 0;
16606 HChar dis_buf[50];
16607 IRTemp srcVec = newTemp(Ity_V128);
16608 UChar modrm = getUChar(delta);
16609 UChar how = xIsZ ? 'z' : 's';
16610 UInt rG = gregOfRexRM(pfx, modrm);
16611
16612 if ( epartIsReg(modrm) ) {
16613 UInt rE = eregOfRexRM(pfx, modrm);
16614 assign( srcVec, getXMMReg(rE) );
16615 delta += 1;
16616 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16617 } else {
16618 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16619 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
16620 delta += alen;
16621 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16622 }
16623
16624 IRExpr* res
16625 = binop( Iop_V128HLtoV256,
16626 binop( Iop_InterleaveHI16x8,
16627 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16628 binop( Iop_InterleaveLO16x8,
16629 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16630 if (!xIsZ)
16631 res = binop(Iop_SarN32x8,
16632 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
16633
16634 putYMMReg ( rG, res );
16635
16636 return delta;
16637}
16638
16639
floriancacba8e2014-12-15 18:58:07 +000016640static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8516a1f2012-06-24 14:26:30 +000016641 Long delta, Bool isAvx )
16642{
16643 IRTemp addr = IRTemp_INVALID;
16644 Int alen = 0;
16645 HChar dis_buf[50];
16646 IRTemp srcBytes = newTemp(Ity_I32);
16647 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016648 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016649 UInt rG = gregOfRexRM(pfx, modrm);
16650
16651 if ( epartIsReg( modrm ) ) {
16652 UInt rE = eregOfRexRM(pfx, modrm);
16653 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
16654 delta += 1;
16655 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16656 } else {
16657 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16658 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
16659 delta += alen;
16660 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16661 }
16662
16663 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16664 ( rG, binop( Iop_64HLtoV128,
16665 unop( Iop_16Sto64,
16666 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
16667 unop( Iop_16Sto64,
16668 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
16669 return delta;
16670}
16671
16672
floriancacba8e2014-12-15 18:58:07 +000016673static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardjcc3d2192013-03-27 11:37:33 +000016674{
16675 IRTemp addr = IRTemp_INVALID;
16676 Int alen = 0;
16677 HChar dis_buf[50];
16678 IRTemp srcBytes = newTemp(Ity_I64);
16679 UChar modrm = getUChar(delta);
16680 UInt rG = gregOfRexRM(pfx, modrm);
16681 IRTemp s3, s2, s1, s0;
16682 s3 = s2 = s1 = s0 = IRTemp_INVALID;
16683
16684 if ( epartIsReg( modrm ) ) {
16685 UInt rE = eregOfRexRM(pfx, modrm);
16686 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
16687 delta += 1;
16688 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
16689 } else {
16690 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16691 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
16692 delta += alen;
16693 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
16694 }
16695
16696 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
16697 putYMMReg( rG, binop( Iop_V128HLtoV256,
16698 binop( Iop_64HLtoV128,
16699 unop( Iop_16Sto64, mkexpr(s3) ),
16700 unop( Iop_16Sto64, mkexpr(s2) ) ),
16701 binop( Iop_64HLtoV128,
16702 unop( Iop_16Sto64, mkexpr(s1) ),
16703 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
16704 return delta;
16705}
16706
16707
floriancacba8e2014-12-15 18:58:07 +000016708static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8516a1f2012-06-24 14:26:30 +000016709 Long delta, Bool isAvx )
16710{
16711 IRTemp addr = IRTemp_INVALID;
16712 Int alen = 0;
16713 HChar dis_buf[50];
16714 IRTemp srcVec = newTemp(Ity_V128);
16715 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016716 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016717 UInt rG = gregOfRexRM(pfx, modrm);
16718
16719 if ( epartIsReg( modrm ) ) {
16720 UInt rE = eregOfRexRM(pfx, modrm);
16721 assign( srcVec, getXMMReg(rE) );
16722 delta += 1;
16723 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16724 } else {
16725 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16726 assign( srcVec,
16727 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
16728 delta += alen;
16729 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16730 }
16731
16732 IRTemp zeroVec = newTemp( Ity_V128 );
16733 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16734
16735 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16736 ( rG, binop( Iop_InterleaveLO16x8,
16737 mkexpr(zeroVec),
16738 binop( Iop_InterleaveLO16x8,
16739 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
16740 return delta;
16741}
16742
16743
floriancacba8e2014-12-15 18:58:07 +000016744static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000016745 Long delta )
16746{
16747 IRTemp addr = IRTemp_INVALID;
16748 Int alen = 0;
16749 HChar dis_buf[50];
16750 IRTemp srcVec = newTemp(Ity_V128);
16751 UChar modrm = getUChar(delta);
16752 UInt rG = gregOfRexRM(pfx, modrm);
16753
16754 if ( epartIsReg( modrm ) ) {
16755 UInt rE = eregOfRexRM(pfx, modrm);
16756 assign( srcVec, getXMMReg(rE) );
16757 delta += 1;
16758 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
16759 } else {
16760 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16761 assign( srcVec,
16762 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16763 delta += alen;
16764 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
16765 }
16766
16767 IRTemp zeroVec = newTemp( Ity_V128 );
16768 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16769
16770 putYMMReg( rG, binop( Iop_V128HLtoV256,
16771 binop( Iop_InterleaveHI16x8,
16772 mkexpr(zeroVec),
16773 binop( Iop_InterleaveLO16x8,
16774 mkexpr(zeroVec), mkexpr(srcVec) ) ),
16775 binop( Iop_InterleaveLO16x8,
16776 mkexpr(zeroVec),
16777 binop( Iop_InterleaveLO16x8,
16778 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
16779 return delta;
16780}
16781
16782
sewardj6fcd43e2012-06-14 08:51:35 +000016783/* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
floriancacba8e2014-12-15 18:58:07 +000016784static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000016785 Long delta, Bool isAvx, Bool xIsZ )
16786{
16787 IRTemp addr = IRTemp_INVALID;
16788 Int alen = 0;
16789 HChar dis_buf[50];
16790 IRTemp srcI64 = newTemp(Ity_I64);
16791 IRTemp srcVec = newTemp(Ity_V128);
16792 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016793 const HChar* mbV = isAvx ? "v" : "";
16794 const HChar how = xIsZ ? 'z' : 's';
sewardj6fcd43e2012-06-14 08:51:35 +000016795 UInt rG = gregOfRexRM(pfx, modrm);
16796 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
16797 thing in a V128, with arbitrary junk in the top 64 bits. Use
16798 one or both of them and let iropt clean up afterwards (as
16799 usual). */
16800 if ( epartIsReg(modrm) ) {
16801 UInt rE = eregOfRexRM(pfx, modrm);
16802 assign( srcVec, getXMMReg(rE) );
16803 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
16804 delta += 1;
16805 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
16806 } else {
16807 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16808 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
16809 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
16810 delta += alen;
16811 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
16812 }
16813
16814 IRExpr* res
16815 = xIsZ /* do math for either zero or sign extend */
16816 ? binop( Iop_InterleaveLO32x4,
16817 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
16818 : binop( Iop_64HLtoV128,
16819 unop( Iop_32Sto64,
16820 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
16821 unop( Iop_32Sto64,
16822 unop( Iop_64to32, mkexpr(srcI64) ) ) );
16823
16824 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
16825
16826 return delta;
16827}
16828
16829
sewardjcc3d2192013-03-27 11:37:33 +000016830/* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
floriancacba8e2014-12-15 18:58:07 +000016831static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000016832 Long delta, Bool xIsZ )
16833{
16834 IRTemp addr = IRTemp_INVALID;
16835 Int alen = 0;
16836 HChar dis_buf[50];
16837 IRTemp srcVec = newTemp(Ity_V128);
16838 UChar modrm = getUChar(delta);
16839 UChar how = xIsZ ? 'z' : 's';
16840 UInt rG = gregOfRexRM(pfx, modrm);
16841 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
16842 thing in a V128, with arbitrary junk in the top 64 bits. Use
16843 one or both of them and let iropt clean up afterwards (as
16844 usual). */
16845 if ( epartIsReg(modrm) ) {
16846 UInt rE = eregOfRexRM(pfx, modrm);
16847 assign( srcVec, getXMMReg(rE) );
16848 delta += 1;
16849 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16850 } else {
16851 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16852 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
16853 delta += alen;
16854 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16855 }
16856
16857 IRExpr* res;
16858 if (xIsZ)
16859 res = binop( Iop_V128HLtoV256,
16860 binop( Iop_InterleaveHI32x4,
16861 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16862 binop( Iop_InterleaveLO32x4,
16863 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16864 else {
16865 IRTemp s3, s2, s1, s0;
16866 s3 = s2 = s1 = s0 = IRTemp_INVALID;
16867 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
16868 res = binop( Iop_V128HLtoV256,
16869 binop( Iop_64HLtoV128,
16870 unop( Iop_32Sto64, mkexpr(s3) ),
16871 unop( Iop_32Sto64, mkexpr(s2) ) ),
16872 binop( Iop_64HLtoV128,
16873 unop( Iop_32Sto64, mkexpr(s1) ),
16874 unop( Iop_32Sto64, mkexpr(s0) ) ) );
16875 }
16876
16877 putYMMReg ( rG, res );
16878
16879 return delta;
16880}
16881
16882
sewardj4ed05e02012-06-18 15:01:30 +000016883/* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
floriancacba8e2014-12-15 18:58:07 +000016884static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj4ed05e02012-06-18 15:01:30 +000016885 Long delta, Bool isAvx, Bool xIsZ )
16886{
16887 IRTemp addr = IRTemp_INVALID;
16888 Int alen = 0;
16889 HChar dis_buf[50];
16890 IRTemp srcVec = newTemp(Ity_V128);
16891 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016892 const HChar* mbV = isAvx ? "v" : "";
16893 const HChar how = xIsZ ? 'z' : 's';
sewardj4ed05e02012-06-18 15:01:30 +000016894 UInt rG = gregOfRexRM(pfx, modrm);
16895 if ( epartIsReg(modrm) ) {
16896 UInt rE = eregOfRexRM(pfx, modrm);
16897 assign( srcVec, getXMMReg(rE) );
16898 delta += 1;
16899 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
16900 } else {
16901 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16902 assign( srcVec,
16903 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
16904 delta += alen;
16905 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
16906 }
16907
16908 IRTemp zeroVec = newTemp(Ity_V128);
16909 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16910
16911 IRExpr* res
16912 = binop(Iop_InterleaveLO8x16,
16913 mkexpr(zeroVec),
16914 binop(Iop_InterleaveLO8x16,
16915 mkexpr(zeroVec), mkexpr(srcVec)));
16916 if (!xIsZ)
16917 res = binop(Iop_SarN32x4,
16918 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
16919
16920 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
16921
16922 return delta;
16923}
16924
16925
sewardjcc3d2192013-03-27 11:37:33 +000016926/* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
floriancacba8e2014-12-15 18:58:07 +000016927static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000016928 Long delta, Bool xIsZ )
16929{
16930 IRTemp addr = IRTemp_INVALID;
16931 Int alen = 0;
16932 HChar dis_buf[50];
16933 IRTemp srcVec = newTemp(Ity_V128);
16934 UChar modrm = getUChar(delta);
16935 UChar how = xIsZ ? 'z' : 's';
16936 UInt rG = gregOfRexRM(pfx, modrm);
16937 if ( epartIsReg(modrm) ) {
16938 UInt rE = eregOfRexRM(pfx, modrm);
16939 assign( srcVec, getXMMReg(rE) );
16940 delta += 1;
16941 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16942 } else {
16943 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16944 assign( srcVec,
16945 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16946 delta += alen;
16947 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16948 }
16949
16950 IRTemp zeroVec = newTemp(Ity_V128);
16951 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16952
16953 IRExpr* res
16954 = binop( Iop_V128HLtoV256,
16955 binop(Iop_InterleaveHI8x16,
16956 mkexpr(zeroVec),
16957 binop(Iop_InterleaveLO8x16,
16958 mkexpr(zeroVec), mkexpr(srcVec)) ),
16959 binop(Iop_InterleaveLO8x16,
16960 mkexpr(zeroVec),
16961 binop(Iop_InterleaveLO8x16,
16962 mkexpr(zeroVec), mkexpr(srcVec)) ) );
16963 if (!xIsZ)
16964 res = binop(Iop_SarN32x8,
16965 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
16966
16967 putYMMReg ( rG, res );
16968
16969 return delta;
16970}
16971
16972
sewardj8516a1f2012-06-24 14:26:30 +000016973/* Handles 128 bit versions of PMOVSXBQ. */
floriancacba8e2014-12-15 18:58:07 +000016974static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8516a1f2012-06-24 14:26:30 +000016975 Long delta, Bool isAvx )
16976{
16977 IRTemp addr = IRTemp_INVALID;
16978 Int alen = 0;
16979 HChar dis_buf[50];
16980 IRTemp srcBytes = newTemp(Ity_I16);
16981 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016982 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016983 UInt rG = gregOfRexRM(pfx, modrm);
16984 if ( epartIsReg(modrm) ) {
16985 UInt rE = eregOfRexRM(pfx, modrm);
16986 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
16987 delta += 1;
16988 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16989 } else {
16990 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16991 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
16992 delta += alen;
16993 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16994 }
16995
16996 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16997 ( rG, binop( Iop_64HLtoV128,
16998 unop( Iop_8Sto64,
16999 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17000 unop( Iop_8Sto64,
17001 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17002 return delta;
17003}
17004
17005
sewardjcc3d2192013-03-27 11:37:33 +000017006/* Handles 256 bit versions of PMOVSXBQ. */
floriancacba8e2014-12-15 18:58:07 +000017007static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000017008 Long delta )
17009{
17010 IRTemp addr = IRTemp_INVALID;
17011 Int alen = 0;
17012 HChar dis_buf[50];
17013 IRTemp srcBytes = newTemp(Ity_I32);
17014 UChar modrm = getUChar(delta);
17015 UInt rG = gregOfRexRM(pfx, modrm);
17016 if ( epartIsReg(modrm) ) {
17017 UInt rE = eregOfRexRM(pfx, modrm);
17018 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17019 delta += 1;
17020 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17021 } else {
17022 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17023 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17024 delta += alen;
17025 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17026 }
17027
17028 putYMMReg
17029 ( rG, binop( Iop_V128HLtoV256,
17030 binop( Iop_64HLtoV128,
17031 unop( Iop_8Sto64,
17032 unop( Iop_16HIto8,
17033 unop( Iop_32HIto16,
17034 mkexpr(srcBytes) ) ) ),
17035 unop( Iop_8Sto64,
17036 unop( Iop_16to8,
17037 unop( Iop_32HIto16,
17038 mkexpr(srcBytes) ) ) ) ),
17039 binop( Iop_64HLtoV128,
17040 unop( Iop_8Sto64,
17041 unop( Iop_16HIto8,
17042 unop( Iop_32to16,
17043 mkexpr(srcBytes) ) ) ),
17044 unop( Iop_8Sto64,
17045 unop( Iop_16to8,
17046 unop( Iop_32to16,
17047 mkexpr(srcBytes) ) ) ) ) ) );
17048 return delta;
17049}
17050
17051
sewardj8516a1f2012-06-24 14:26:30 +000017052/* Handles 128 bit versions of PMOVZXBQ. */
floriancacba8e2014-12-15 18:58:07 +000017053static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj8516a1f2012-06-24 14:26:30 +000017054 Long delta, Bool isAvx )
17055{
17056 IRTemp addr = IRTemp_INVALID;
17057 Int alen = 0;
17058 HChar dis_buf[50];
17059 IRTemp srcVec = newTemp(Ity_V128);
17060 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000017061 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000017062 UInt rG = gregOfRexRM(pfx, modrm);
17063 if ( epartIsReg(modrm) ) {
17064 UInt rE = eregOfRexRM(pfx, modrm);
17065 assign( srcVec, getXMMReg(rE) );
17066 delta += 1;
17067 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17068 } else {
17069 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17070 assign( srcVec,
17071 unop( Iop_32UtoV128,
17072 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17073 delta += alen;
17074 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17075 }
17076
17077 IRTemp zeroVec = newTemp(Ity_V128);
17078 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17079
17080 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17081 ( rG, binop( Iop_InterleaveLO8x16,
17082 mkexpr(zeroVec),
17083 binop( Iop_InterleaveLO8x16,
17084 mkexpr(zeroVec),
17085 binop( Iop_InterleaveLO8x16,
17086 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17087 return delta;
17088}
17089
17090
sewardjcc3d2192013-03-27 11:37:33 +000017091/* Handles 256 bit versions of PMOVZXBQ. */
floriancacba8e2014-12-15 18:58:07 +000017092static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardjcc3d2192013-03-27 11:37:33 +000017093 Long delta )
17094{
17095 IRTemp addr = IRTemp_INVALID;
17096 Int alen = 0;
17097 HChar dis_buf[50];
17098 IRTemp srcVec = newTemp(Ity_V128);
17099 UChar modrm = getUChar(delta);
17100 UInt rG = gregOfRexRM(pfx, modrm);
17101 if ( epartIsReg(modrm) ) {
17102 UInt rE = eregOfRexRM(pfx, modrm);
17103 assign( srcVec, getXMMReg(rE) );
17104 delta += 1;
17105 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17106 } else {
17107 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17108 assign( srcVec,
17109 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17110 delta += alen;
17111 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17112 }
17113
17114 IRTemp zeroVec = newTemp(Ity_V128);
17115 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17116
17117 putYMMReg
17118 ( rG, binop( Iop_V128HLtoV256,
17119 binop( Iop_InterleaveHI8x16,
17120 mkexpr(zeroVec),
17121 binop( Iop_InterleaveLO8x16,
17122 mkexpr(zeroVec),
17123 binop( Iop_InterleaveLO8x16,
17124 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17125 binop( Iop_InterleaveLO8x16,
17126 mkexpr(zeroVec),
17127 binop( Iop_InterleaveLO8x16,
17128 mkexpr(zeroVec),
17129 binop( Iop_InterleaveLO8x16,
17130 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17131 ) );
17132 return delta;
17133}
17134
17135
floriancacba8e2014-12-15 18:58:07 +000017136static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
sewardje8a7eb72012-06-12 14:59:17 +000017137 Long delta, Bool isAvx )
17138{
17139 IRTemp addr = IRTemp_INVALID;
17140 Int alen = 0;
17141 HChar dis_buf[50];
17142 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000017143 const HChar* mbV = isAvx ? "v" : "";
sewardje8a7eb72012-06-12 14:59:17 +000017144 IRTemp sV = newTemp(Ity_V128);
17145 IRTemp sHi = newTemp(Ity_I64);
17146 IRTemp sLo = newTemp(Ity_I64);
17147 IRTemp dLo = newTemp(Ity_I64);
17148 UInt rG = gregOfRexRM(pfx,modrm);
17149 if (epartIsReg(modrm)) {
17150 UInt rE = eregOfRexRM(pfx,modrm);
17151 assign( sV, getXMMReg(rE) );
17152 delta += 1;
17153 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17154 } else {
17155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj4955c742012-09-02 12:13:34 +000017156 if (!isAvx)
17157 gen_SEGV_if_not_16_aligned(addr);
sewardje8a7eb72012-06-12 14:59:17 +000017158 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17159 delta += alen;
17160 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17161 }
17162 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17163 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17164 assign( dLo, mkIRExprCCall(
17165 Ity_I64, 0/*regparms*/,
17166 "amd64g_calculate_sse_phminposuw",
17167 &amd64g_calculate_sse_phminposuw,
17168 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17169 ));
17170 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17171 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17172 return delta;
17173}
17174
17175
floriancacba8e2014-12-15 18:58:07 +000017176static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
sewardj1407a362012-06-24 15:11:38 +000017177 Long delta, Bool isAvx, UChar opc )
17178{
17179 IRTemp addr = IRTemp_INVALID;
17180 Int alen = 0;
17181 HChar dis_buf[50];
17182 UChar modrm = getUChar(delta);
17183 UInt rG = gregOfRexRM(pfx, modrm);
17184 UInt regNoL = 0;
17185 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17186
17187 /* This is a nasty kludge. We need to pass 2 x V128 to the
17188 helper. Since we can't do that, use a dirty
17189 helper to compute the results directly from the XMM regs in
17190 the guest state. That means for the memory case, we need to
17191 move the left operand into a pseudo-register (XMM16, let's
17192 call it). */
17193 if (epartIsReg(modrm)) {
17194 regNoL = eregOfRexRM(pfx, modrm);
17195 delta += 1;
17196 } else {
17197 regNoL = 16; /* use XMM16 as an intermediary */
17198 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17199 /* alignment check needed ???? */
17200 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17201 delta += alen;
17202 }
17203
17204 void* fn = &amd64g_dirtyhelper_AES;
florian55085f82012-11-21 00:36:55 +000017205 const HChar* nm = "amd64g_dirtyhelper_AES";
sewardj1407a362012-06-24 15:11:38 +000017206
17207 /* Round up the arguments. Note that this is a kludge -- the
17208 use of mkU64 rather than mkIRExpr_HWord implies the
17209 assumption that the host's word size is 64-bit. */
17210 UInt gstOffD = ymmGuestRegOffset(rG);
17211 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17212 UInt gstOffR = ymmGuestRegOffset(regNoR);
17213 IRExpr* opc4 = mkU64(opc);
17214 IRExpr* gstOffDe = mkU64(gstOffD);
17215 IRExpr* gstOffLe = mkU64(gstOffL);
17216 IRExpr* gstOffRe = mkU64(gstOffR);
17217 IRExpr** args
florian90419562013-08-15 20:54:52 +000017218 = mkIRExprVec_5( IRExpr_BBPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
sewardj1407a362012-06-24 15:11:38 +000017219
17220 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
sewardj74142b82013-08-08 10:28:59 +000017221 /* It's not really a dirty call, but we can't use the clean helper
17222 mechanism here for the very lame reason that we can't pass 2 x
17223 V128s by value to a helper. Hence this roundabout scheme. */
sewardj1407a362012-06-24 15:11:38 +000017224 d->nFxState = 2;
17225 vex_bzero(&d->fxState, sizeof(d->fxState));
17226 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17227 the second for !isAvx or the third for isAvx.
17228 AESIMC (0xDB) reads the first register, and writes the second. */
17229 d->fxState[0].fx = Ifx_Read;
17230 d->fxState[0].offset = gstOffL;
17231 d->fxState[0].size = sizeof(U128);
17232 d->fxState[1].offset = gstOffR;
17233 d->fxState[1].size = sizeof(U128);
17234 if (opc == 0xDB)
17235 d->fxState[1].fx = Ifx_Write;
17236 else if (!isAvx || rG == regNoR)
17237 d->fxState[1].fx = Ifx_Modify;
17238 else {
17239 d->fxState[1].fx = Ifx_Read;
17240 d->nFxState++;
17241 d->fxState[2].fx = Ifx_Write;
17242 d->fxState[2].offset = gstOffD;
17243 d->fxState[2].size = sizeof(U128);
17244 }
17245
17246 stmt( IRStmt_Dirty(d) );
17247 {
florian55085f82012-11-21 00:36:55 +000017248 const HChar* opsuf;
sewardj1407a362012-06-24 15:11:38 +000017249 switch (opc) {
17250 case 0xDC: opsuf = "enc"; break;
17251 case 0XDD: opsuf = "enclast"; break;
17252 case 0xDE: opsuf = "dec"; break;
17253 case 0xDF: opsuf = "declast"; break;
17254 case 0xDB: opsuf = "imc"; break;
17255 default: vassert(0);
17256 }
17257 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17258 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17259 nameXMMReg(regNoR),
17260 (isAvx && opc != 0xDB) ? "," : "",
17261 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17262 }
17263 if (isAvx)
17264 putYMMRegLane128( rG, 1, mkV128(0) );
17265 return delta;
17266}
17267
floriancacba8e2014-12-15 18:58:07 +000017268static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
sewardj1407a362012-06-24 15:11:38 +000017269 Long delta, Bool isAvx )
17270{
17271 IRTemp addr = IRTemp_INVALID;
17272 Int alen = 0;
17273 HChar dis_buf[50];
17274 UChar modrm = getUChar(delta);
17275 UInt regNoL = 0;
17276 UInt regNoR = gregOfRexRM(pfx, modrm);
17277 UChar imm = 0;
17278
17279 /* This is a nasty kludge. See AESENC et al. instructions. */
17280 modrm = getUChar(delta);
17281 if (epartIsReg(modrm)) {
17282 regNoL = eregOfRexRM(pfx, modrm);
17283 imm = getUChar(delta+1);
17284 delta += 1+1;
17285 } else {
17286 regNoL = 16; /* use XMM16 as an intermediary */
17287 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17288 /* alignment check ???? . */
17289 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17290 imm = getUChar(delta+alen);
17291 delta += alen+1;
17292 }
17293
17294 /* Who ya gonna call? Presumably not Ghostbusters. */
17295 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
florian55085f82012-11-21 00:36:55 +000017296 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
sewardj1407a362012-06-24 15:11:38 +000017297
17298 /* Round up the arguments. Note that this is a kludge -- the
17299 use of mkU64 rather than mkIRExpr_HWord implies the
17300 assumption that the host's word size is 64-bit. */
17301 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17302 UInt gstOffR = ymmGuestRegOffset(regNoR);
17303
17304 IRExpr* imme = mkU64(imm & 0xFF);
17305 IRExpr* gstOffLe = mkU64(gstOffL);
17306 IRExpr* gstOffRe = mkU64(gstOffR);
17307 IRExpr** args
florian90419562013-08-15 20:54:52 +000017308 = mkIRExprVec_4( IRExpr_BBPTR(), imme, gstOffLe, gstOffRe );
sewardj1407a362012-06-24 15:11:38 +000017309
17310 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
sewardj74142b82013-08-08 10:28:59 +000017311 /* It's not really a dirty call, but we can't use the clean helper
17312 mechanism here for the very lame reason that we can't pass 2 x
17313 V128s by value to a helper. Hence this roundabout scheme. */
sewardj1407a362012-06-24 15:11:38 +000017314 d->nFxState = 2;
17315 vex_bzero(&d->fxState, sizeof(d->fxState));
17316 d->fxState[0].fx = Ifx_Read;
17317 d->fxState[0].offset = gstOffL;
17318 d->fxState[0].size = sizeof(U128);
17319 d->fxState[1].fx = Ifx_Write;
17320 d->fxState[1].offset = gstOffR;
17321 d->fxState[1].size = sizeof(U128);
17322 stmt( IRStmt_Dirty(d) );
17323
17324 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17325 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17326 nameXMMReg(regNoR));
17327 if (isAvx)
17328 putYMMRegLane128( regNoR, 1, mkV128(0) );
17329 return delta;
17330}
17331
17332
sewardj80611e32012-01-20 13:07:24 +000017333__attribute__((noinline))
17334static
17335Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000017336 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000017337 Prefix pfx, Int sz, Long deltaIN )
17338{
17339 IRTemp addr = IRTemp_INVALID;
17340 UChar modrm = 0;
17341 Int alen = 0;
17342 HChar dis_buf[50];
17343
17344 *decode_OK = False;
17345
17346 Long delta = deltaIN;
17347 UChar opc = getUChar(delta);
17348 delta++;
17349 switch (opc) {
17350
17351 case 0x10:
17352 case 0x14:
17353 case 0x15:
17354 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17355 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17356 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17357 Blend at various granularities, with XMM0 (implicit operand)
17358 providing the controlling mask.
17359 */
17360 if (have66noF2noF3(pfx) && sz == 2) {
17361 modrm = getUChar(delta);
17362
florian55085f82012-11-21 00:36:55 +000017363 const HChar* nm = NULL;
sewardj80611e32012-01-20 13:07:24 +000017364 UInt gran = 0;
17365 IROp opSAR = Iop_INVALID;
17366 switch (opc) {
17367 case 0x10:
17368 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17369 break;
17370 case 0x14:
17371 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17372 break;
17373 case 0x15:
17374 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17375 break;
17376 }
17377 vassert(nm);
17378
17379 IRTemp vecE = newTemp(Ity_V128);
17380 IRTemp vecG = newTemp(Ity_V128);
17381 IRTemp vec0 = newTemp(Ity_V128);
17382
17383 if ( epartIsReg(modrm) ) {
17384 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17385 delta += 1;
17386 DIP( "%s %s,%s\n", nm,
17387 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17388 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17389 } else {
17390 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17391 gen_SEGV_if_not_16_aligned( addr );
17392 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17393 delta += alen;
17394 DIP( "%s %s,%s\n", nm,
17395 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17396 }
17397
17398 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17399 assign(vec0, getXMMReg(0));
17400
sewardje8a7eb72012-06-12 14:59:17 +000017401 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
sewardjc4530ae2012-05-21 10:18:49 +000017402 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000017403
17404 goto decode_success;
17405 }
17406 break;
17407
17408 case 0x17:
17409 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17410 Logical compare (set ZF and CF from AND/ANDN of the operands) */
17411 if (have66noF2noF3(pfx)
17412 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjed1884d2012-06-21 08:53:48 +000017413 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
sewardj80611e32012-01-20 13:07:24 +000017414 goto decode_success;
17415 }
17416 break;
17417
17418 case 0x20:
17419 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
17420 Packed Move with Sign Extend from Byte to Word (XMM) */
sewardj6fcd43e2012-06-14 08:51:35 +000017421 if (have66noF2noF3(pfx) && sz == 2) {
17422 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17423 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017424 goto decode_success;
17425 }
17426 break;
17427
17428 case 0x21:
17429 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
17430 Packed Move with Sign Extend from Byte to DWord (XMM) */
17431 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000017432 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17433 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017434 goto decode_success;
17435 }
17436 break;
17437
17438 case 0x22:
17439 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
17440 Packed Move with Sign Extend from Byte to QWord (XMM) */
17441 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017442 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017443 goto decode_success;
17444 }
17445 break;
17446
17447 case 0x23:
17448 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
17449 Packed Move with Sign Extend from Word to DWord (XMM) */
17450 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017451 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
17452 False/*!isAvx*/, False/*!xIsZ*/);
sewardj80611e32012-01-20 13:07:24 +000017453 goto decode_success;
17454 }
17455 break;
17456
17457 case 0x24:
17458 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
17459 Packed Move with Sign Extend from Word to QWord (XMM) */
17460 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017461 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017462 goto decode_success;
17463 }
17464 break;
17465
17466 case 0x25:
17467 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
17468 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
17469 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017470 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17471 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017472 goto decode_success;
17473 }
17474 break;
17475
17476 case 0x28:
sewardj89378162012-06-24 12:12:20 +000017477 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
sewardj80611e32012-01-20 13:07:24 +000017478 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
17479 64-bit half */
17480 /* This is a really poor translation -- could be improved if
sewardj89378162012-06-24 12:12:20 +000017481 performance critical. It's a copy-paste of PMULUDQ, too. */
sewardj80611e32012-01-20 13:07:24 +000017482 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000017483 IRTemp sV = newTemp(Ity_V128);
17484 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000017485 modrm = getUChar(delta);
sewardj89378162012-06-24 12:12:20 +000017486 UInt rG = gregOfRexRM(pfx,modrm);
17487 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017488 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000017489 UInt rE = eregOfRexRM(pfx,modrm);
17490 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000017491 delta += 1;
sewardj89378162012-06-24 12:12:20 +000017492 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000017493 } else {
17494 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17495 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17496 delta += alen;
sewardj89378162012-06-24 12:12:20 +000017497 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000017498 }
17499
sewardj89378162012-06-24 12:12:20 +000017500 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
sewardj80611e32012-01-20 13:07:24 +000017501 goto decode_success;
17502 }
17503 break;
17504
17505 case 0x29:
17506 /* 66 0F 38 29 = PCMPEQQ
17507 64x2 equality comparison */
17508 if (have66noF2noF3(pfx) && sz == 2) {
17509 /* FIXME: this needs an alignment check */
17510 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
17511 "pcmpeqq", Iop_CmpEQ64x2, False );
17512 goto decode_success;
17513 }
17514 break;
17515
sewardj92eef382013-03-26 10:27:39 +000017516 case 0x2A:
17517 /* 66 0F 38 2A = MOVNTDQA
17518 "non-temporal" "streaming" load
17519 Handle like MOVDQA but only memory operand is allowed */
17520 if (have66noF2noF3(pfx) && sz == 2) {
17521 modrm = getUChar(delta);
17522 if (!epartIsReg(modrm)) {
17523 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17524 gen_SEGV_if_not_16_aligned( addr );
17525 putXMMReg( gregOfRexRM(pfx,modrm),
17526 loadLE(Ity_V128, mkexpr(addr)) );
17527 DIP("movntdqa %s,%s\n", dis_buf,
17528 nameXMMReg(gregOfRexRM(pfx,modrm)));
17529 delta += alen;
17530 goto decode_success;
17531 }
17532 }
17533 break;
17534
sewardj80611e32012-01-20 13:07:24 +000017535 case 0x2B:
17536 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
17537 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
17538 if (have66noF2noF3(pfx) && sz == 2) {
17539
17540 modrm = getUChar(delta);
17541
17542 IRTemp argL = newTemp(Ity_V128);
17543 IRTemp argR = newTemp(Ity_V128);
17544
17545 if ( epartIsReg(modrm) ) {
17546 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17547 delta += 1;
17548 DIP( "packusdw %s,%s\n",
17549 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17550 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17551 } else {
17552 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17553 gen_SEGV_if_not_16_aligned( addr );
17554 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
17555 delta += alen;
17556 DIP( "packusdw %s,%s\n",
17557 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17558 }
17559
17560 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
17561
17562 putXMMReg( gregOfRexRM(pfx, modrm),
17563 binop( Iop_QNarrowBin32Sto16Ux8,
17564 mkexpr(argL), mkexpr(argR)) );
17565
17566 goto decode_success;
17567 }
17568 break;
17569
17570 case 0x30:
17571 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
17572 Packed Move with Zero Extend from Byte to Word (XMM) */
17573 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017574 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17575 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017576 goto decode_success;
17577 }
17578 break;
17579
17580 case 0x31:
17581 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
17582 Packed Move with Zero Extend from Byte to DWord (XMM) */
17583 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000017584 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17585 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017586 goto decode_success;
17587 }
17588 break;
17589
17590 case 0x32:
17591 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
17592 Packed Move with Zero Extend from Byte to QWord (XMM) */
17593 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017594 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017595 goto decode_success;
17596 }
17597 break;
17598
17599 case 0x33:
17600 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
17601 Packed Move with Zero Extend from Word to DWord (XMM) */
17602 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017603 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
17604 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017605 goto decode_success;
17606 }
17607 break;
17608
17609 case 0x34:
17610 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
17611 Packed Move with Zero Extend from Word to QWord (XMM) */
17612 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017613 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017614 goto decode_success;
17615 }
17616 break;
17617
17618 case 0x35:
17619 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
17620 Packed Move with Zero Extend from DWord to QWord (XMM) */
17621 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017622 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17623 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017624 goto decode_success;
17625 }
17626 break;
17627
17628 case 0x37:
17629 /* 66 0F 38 37 = PCMPGTQ
17630 64x2 comparison (signed, presumably; the Intel docs don't say :-)
17631 */
17632 if (have66noF2noF3(pfx) && sz == 2) {
17633 /* FIXME: this needs an alignment check */
17634 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
17635 "pcmpgtq", Iop_CmpGT64Sx2, False );
17636 goto decode_success;
17637 }
17638 break;
17639
17640 case 0x38:
17641 case 0x3C:
17642 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
17643 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
17644 */
17645 if (have66noF2noF3(pfx) && sz == 2) {
17646 /* FIXME: this needs an alignment check */
17647 Bool isMAX = opc == 0x3C;
17648 delta = dis_SSEint_E_to_G(
17649 vbi, pfx, delta,
17650 isMAX ? "pmaxsb" : "pminsb",
17651 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
17652 False
17653 );
17654 goto decode_success;
17655 }
17656 break;
17657
17658 case 0x39:
17659 case 0x3D:
17660 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
17661 Minimum of Packed Signed Double Word Integers (XMM)
17662 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
17663 Maximum of Packed Signed Double Word Integers (XMM)
17664 */
17665 if (have66noF2noF3(pfx) && sz == 2) {
17666 /* FIXME: this needs an alignment check */
17667 Bool isMAX = opc == 0x3D;
17668 delta = dis_SSEint_E_to_G(
17669 vbi, pfx, delta,
17670 isMAX ? "pmaxsd" : "pminsd",
17671 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
17672 False
17673 );
17674 goto decode_success;
17675 }
17676 break;
17677
17678 case 0x3A:
17679 case 0x3E:
17680 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
17681 Minimum of Packed Unsigned Word Integers (XMM)
17682 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
17683 Maximum of Packed Unsigned Word Integers (XMM)
17684 */
17685 if (have66noF2noF3(pfx) && sz == 2) {
17686 /* FIXME: this needs an alignment check */
17687 Bool isMAX = opc == 0x3E;
17688 delta = dis_SSEint_E_to_G(
17689 vbi, pfx, delta,
17690 isMAX ? "pmaxuw" : "pminuw",
17691 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
17692 False
17693 );
17694 goto decode_success;
17695 }
17696 break;
17697
17698 case 0x3B:
17699 case 0x3F:
17700 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
17701 Minimum of Packed Unsigned Doubleword Integers (XMM)
17702 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
17703 Maximum of Packed Unsigned Doubleword Integers (XMM)
17704 */
17705 if (have66noF2noF3(pfx) && sz == 2) {
17706 /* FIXME: this needs an alignment check */
17707 Bool isMAX = opc == 0x3F;
17708 delta = dis_SSEint_E_to_G(
17709 vbi, pfx, delta,
17710 isMAX ? "pmaxud" : "pminud",
17711 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
17712 False
17713 );
17714 goto decode_success;
17715 }
17716 break;
17717
17718 case 0x40:
17719 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
17720 32x4 integer multiply from xmm2/m128 to xmm1 */
17721 if (have66noF2noF3(pfx) && sz == 2) {
17722
17723 modrm = getUChar(delta);
17724
17725 IRTemp argL = newTemp(Ity_V128);
17726 IRTemp argR = newTemp(Ity_V128);
17727
17728 if ( epartIsReg(modrm) ) {
17729 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17730 delta += 1;
17731 DIP( "pmulld %s,%s\n",
17732 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17733 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17734 } else {
17735 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17736 gen_SEGV_if_not_16_aligned( addr );
17737 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
17738 delta += alen;
17739 DIP( "pmulld %s,%s\n",
17740 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17741 }
17742
17743 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
17744
17745 putXMMReg( gregOfRexRM(pfx, modrm),
17746 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
17747
17748 goto decode_success;
17749 }
17750 break;
17751
sewardj8cb931e2012-02-16 22:02:14 +000017752 case 0x41:
17753 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
17754 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
17755 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000017756 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj8cb931e2012-02-16 22:02:14 +000017757 goto decode_success;
17758 }
17759 break;
17760
philippeff4d6be2012-02-14 21:34:56 +000017761 case 0xDC:
17762 case 0xDD:
17763 case 0xDE:
17764 case 0xDF:
17765 case 0xDB:
17766 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
17767 DD /r = AESENCLAST xmm1, xmm2/m128
17768 DE /r = AESDEC xmm1, xmm2/m128
17769 DF /r = AESDECLAST xmm1, xmm2/m128
17770
17771 DB /r = AESIMC xmm1, xmm2/m128 */
17772 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000017773 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
philippeff4d6be2012-02-14 21:34:56 +000017774 goto decode_success;
17775 }
17776 break;
17777
sewardj80611e32012-01-20 13:07:24 +000017778 case 0xF0:
17779 case 0xF1:
17780 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
17781 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
17782 The decoding on this is a bit unusual.
17783 */
17784 if (haveF2noF3(pfx)
17785 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
17786 modrm = getUChar(delta);
17787
17788 if (opc == 0xF0)
17789 sz = 1;
17790 else
17791 vassert(sz == 2 || sz == 4 || sz == 8);
17792
17793 IRType tyE = szToITy(sz);
17794 IRTemp valE = newTemp(tyE);
17795
17796 if (epartIsReg(modrm)) {
17797 assign(valE, getIRegE(sz, pfx, modrm));
17798 delta += 1;
17799 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
17800 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
17801 } else {
17802 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17803 assign(valE, loadLE(tyE, mkexpr(addr)));
17804 delta += alen;
17805 DIP("crc32b %s,%s\n", dis_buf,
17806 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
17807 }
17808
17809 /* Somewhat funny getting/putting of the crc32 value, in order
17810 to ensure that it turns into 64-bit gets and puts. However,
17811 mask off the upper 32 bits so as to not get memcheck false
17812 +ves around the helper call. */
17813 IRTemp valG0 = newTemp(Ity_I64);
17814 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
17815 mkU64(0xFFFFFFFF)));
17816
florian55085f82012-11-21 00:36:55 +000017817 const HChar* nm = NULL;
sewardj80611e32012-01-20 13:07:24 +000017818 void* fn = NULL;
17819 switch (sz) {
17820 case 1: nm = "amd64g_calc_crc32b";
17821 fn = &amd64g_calc_crc32b; break;
17822 case 2: nm = "amd64g_calc_crc32w";
17823 fn = &amd64g_calc_crc32w; break;
17824 case 4: nm = "amd64g_calc_crc32l";
17825 fn = &amd64g_calc_crc32l; break;
17826 case 8: nm = "amd64g_calc_crc32q";
17827 fn = &amd64g_calc_crc32q; break;
17828 }
17829 vassert(nm && fn);
17830 IRTemp valG1 = newTemp(Ity_I64);
17831 assign(valG1,
17832 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
17833 mkIRExprVec_2(mkexpr(valG0),
17834 widenUto64(mkexpr(valE)))));
17835
17836 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
17837 goto decode_success;
17838 }
17839 break;
17840
17841 default:
17842 break;
17843
17844 }
17845
17846 //decode_failure:
17847 *decode_OK = False;
17848 return deltaIN;
17849
17850 decode_success:
17851 *decode_OK = True;
17852 return delta;
17853}
17854
17855
17856/*------------------------------------------------------------*/
17857/*--- ---*/
17858/*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
17859/*--- ---*/
17860/*------------------------------------------------------------*/
17861
floriancacba8e2014-12-15 18:58:07 +000017862static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
sewardj82096922012-06-24 14:57:59 +000017863 Long delta, Bool isAvx )
17864{
17865 IRTemp addr = IRTemp_INVALID;
17866 IRTemp t0 = IRTemp_INVALID;
17867 IRTemp t1 = IRTemp_INVALID;
17868 IRTemp t2 = IRTemp_INVALID;
17869 IRTemp t3 = IRTemp_INVALID;
17870 UChar modrm = getUChar(delta);
17871 Int alen = 0;
17872 HChar dis_buf[50];
17873 UInt rG = gregOfRexRM(pfx,modrm);
17874 Int imm8_20;
17875 IRTemp xmm_vec = newTemp(Ity_V128);
17876 IRTemp d16 = newTemp(Ity_I16);
florian55085f82012-11-21 00:36:55 +000017877 const HChar* mbV = isAvx ? "v" : "";
sewardj82096922012-06-24 14:57:59 +000017878
17879 vassert(0==getRexW(pfx)); /* ensured by caller */
17880 assign( xmm_vec, getXMMReg(rG) );
17881 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
17882
17883 if ( epartIsReg( modrm ) ) {
17884 imm8_20 = (Int)(getUChar(delta+1) & 7);
17885 } else {
17886 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17887 imm8_20 = (Int)(getUChar(delta+alen) & 7);
17888 }
17889
17890 switch (imm8_20) {
17891 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
17892 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
17893 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
17894 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
17895 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
17896 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
17897 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
17898 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
17899 default: vassert(0);
17900 }
17901
17902 if ( epartIsReg( modrm ) ) {
17903 UInt rE = eregOfRexRM(pfx,modrm);
17904 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
17905 delta += 1+1;
17906 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
17907 nameXMMReg( rG ), nameIReg32( rE ) );
17908 } else {
17909 storeLE( mkexpr(addr), mkexpr(d16) );
17910 delta += alen+1;
17911 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
17912 }
17913 return delta;
17914}
17915
17916
floriancacba8e2014-12-15 18:58:07 +000017917static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
sewardjc4530ae2012-05-21 10:18:49 +000017918 Long delta, Bool isAvx )
17919{
17920 IRTemp addr = IRTemp_INVALID;
17921 IRTemp t0 = IRTemp_INVALID;
17922 IRTemp t1 = IRTemp_INVALID;
17923 IRTemp t2 = IRTemp_INVALID;
17924 IRTemp t3 = IRTemp_INVALID;
17925 UChar modrm = 0;
17926 Int alen = 0;
17927 HChar dis_buf[50];
17928
17929 Int imm8_10;
17930 IRTemp xmm_vec = newTemp(Ity_V128);
17931 IRTemp src_dword = newTemp(Ity_I32);
florian55085f82012-11-21 00:36:55 +000017932 const HChar* mbV = isAvx ? "v" : "";
sewardjc4530ae2012-05-21 10:18:49 +000017933
17934 vassert(0==getRexW(pfx)); /* ensured by caller */
17935 modrm = getUChar(delta);
17936 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj4b1cc832012-06-13 11:10:20 +000017937 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardjc4530ae2012-05-21 10:18:49 +000017938
17939 if ( epartIsReg( modrm ) ) {
17940 imm8_10 = (Int)(getUChar(delta+1) & 3);
17941 } else {
17942 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17943 imm8_10 = (Int)(getUChar(delta+alen) & 3);
17944 }
17945
17946 switch ( imm8_10 ) {
17947 case 0: assign( src_dword, mkexpr(t0) ); break;
17948 case 1: assign( src_dword, mkexpr(t1) ); break;
17949 case 2: assign( src_dword, mkexpr(t2) ); break;
17950 case 3: assign( src_dword, mkexpr(t3) ); break;
17951 default: vassert(0);
17952 }
17953
17954 if ( epartIsReg( modrm ) ) {
17955 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
17956 delta += 1+1;
17957 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
17958 nameXMMReg( gregOfRexRM(pfx, modrm) ),
17959 nameIReg32( eregOfRexRM(pfx, modrm) ) );
17960 } else {
17961 storeLE( mkexpr(addr), mkexpr(src_dword) );
17962 delta += alen+1;
17963 DIP( "%spextrd $%d, %s,%s\n", mbV,
17964 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
17965 }
17966 return delta;
17967}
17968
17969
floriancacba8e2014-12-15 18:58:07 +000017970static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
sewardj56c30312012-06-12 08:45:39 +000017971 Long delta, Bool isAvx )
17972{
17973 IRTemp addr = IRTemp_INVALID;
17974 UChar modrm = 0;
17975 Int alen = 0;
17976 HChar dis_buf[50];
17977
17978 Int imm8_0;
17979 IRTemp xmm_vec = newTemp(Ity_V128);
17980 IRTemp src_qword = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000017981 const HChar* mbV = isAvx ? "v" : "";
sewardj56c30312012-06-12 08:45:39 +000017982
17983 vassert(1==getRexW(pfx)); /* ensured by caller */
17984 modrm = getUChar(delta);
17985 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
17986
17987 if ( epartIsReg( modrm ) ) {
17988 imm8_0 = (Int)(getUChar(delta+1) & 1);
17989 } else {
17990 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17991 imm8_0 = (Int)(getUChar(delta+alen) & 1);
17992 }
17993
17994 switch ( imm8_0 ) {
17995 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
17996 break;
17997 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
17998 break;
17999 default: vassert(0);
18000 }
18001
18002 if ( epartIsReg( modrm ) ) {
18003 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18004 delta += 1+1;
18005 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18006 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18007 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18008 } else {
18009 storeLE( mkexpr(addr), mkexpr(src_qword) );
18010 delta += alen+1;
18011 DIP( "%spextrq $%d, %s,%s\n", mbV,
18012 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18013 }
18014 return delta;
18015}
18016
sewardj009230b2013-01-26 11:47:55 +000018017static IRExpr* math_CTZ32(IRExpr *exp)
sewardjf833ed42012-11-20 15:24:24 +000018018{
sewardj009230b2013-01-26 11:47:55 +000018019 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
sewardjf833ed42012-11-20 15:24:24 +000018020 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18021}
18022
sewardjf833ed42012-11-20 15:24:24 +000018023static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18024 Long delta, UChar opc, UChar imm,
18025 HChar dis_buf[])
18026{
18027 /* We only handle PCMPISTRI for now */
18028 vassert((opc & 0x03) == 0x03);
18029 /* And only an immediate byte of 0x38 or 0x3A */
18030 vassert((imm & ~0x02) == 0x38);
18031
18032 /* FIXME: Is this correct when RegNoL == 16 ? */
18033 IRTemp argL = newTemp(Ity_V128);
18034 assign(argL, getXMMReg(regNoL));
18035 IRTemp argR = newTemp(Ity_V128);
18036 assign(argR, getXMMReg(regNoR));
18037
18038 IRTemp zmaskL = newTemp(Ity_I32);
sewardj009230b2013-01-26 11:47:55 +000018039 assign(zmaskL, unop(Iop_16Uto32,
18040 unop(Iop_GetMSBs8x16,
18041 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
sewardjf833ed42012-11-20 15:24:24 +000018042 IRTemp zmaskR = newTemp(Ity_I32);
sewardj009230b2013-01-26 11:47:55 +000018043 assign(zmaskR, unop(Iop_16Uto32,
18044 unop(Iop_GetMSBs8x16,
18045 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
sewardjf833ed42012-11-20 15:24:24 +000018046
18047 /* We want validL = ~(zmaskL | -zmaskL)
18048
18049 But this formulation kills memcheck's validity tracking when any
18050 bits above the first "1" are invalid. So reformulate as:
18051
18052 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18053 */
18054
sewardj009230b2013-01-26 11:47:55 +000018055 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
sewardjf833ed42012-11-20 15:24:24 +000018056
sewardj009230b2013-01-26 11:47:55 +000018057 /* Generate a bool expression which is zero iff the original is
sewardjf833ed42012-11-20 15:24:24 +000018058 zero. Do this carefully so memcheck can propagate validity bits
18059 correctly.
18060 */
sewardj009230b2013-01-26 11:47:55 +000018061 IRTemp zmaskL_zero = newTemp(Ity_I1);
18062 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
sewardjf833ed42012-11-20 15:24:24 +000018063
18064 IRTemp validL = newTemp(Ity_I32);
18065 assign(validL, binop(Iop_Sub32,
florian99dd03e2013-01-29 03:56:06 +000018066 IRExpr_ITE(mkexpr(zmaskL_zero),
18067 binop(Iop_Shl32, mkU32(1), ctzL),
18068 mkU32(0)),
sewardjf833ed42012-11-20 15:24:24 +000018069 mkU32(1)));
18070
18071 /* And similarly for validR. */
sewardj009230b2013-01-26 11:47:55 +000018072 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18073 IRTemp zmaskR_zero = newTemp(Ity_I1);
18074 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
sewardjf833ed42012-11-20 15:24:24 +000018075 IRTemp validR = newTemp(Ity_I32);
18076 assign(validR, binop(Iop_Sub32,
florian99dd03e2013-01-29 03:56:06 +000018077 IRExpr_ITE(mkexpr(zmaskR_zero),
18078 binop(Iop_Shl32, mkU32(1), ctzR),
18079 mkU32(0)),
sewardjf833ed42012-11-20 15:24:24 +000018080 mkU32(1)));
18081
18082 /* Do the actual comparison. */
sewardj009230b2013-01-26 11:47:55 +000018083 IRExpr *boolResII = unop(Iop_16Uto32,
18084 unop(Iop_GetMSBs8x16,
18085 binop(Iop_CmpEQ8x16, mkexpr(argL),
18086 mkexpr(argR))));
sewardjf833ed42012-11-20 15:24:24 +000018087
18088 /* Compute boolresII & validL & validR (i.e., if both valid, use
18089 comparison result) */
18090 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18091 binop(Iop_And32,
18092 mkexpr(validL), mkexpr(validR)));
18093
18094 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18095 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18096 mkexpr(validL), mkexpr(validR)));
18097 /* Otherwise, zero. */
18098 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18099 binop(Iop_Or32, intRes1_a, intRes1_b));
18100
18101 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18102 result. */
18103 IRTemp intRes2 = newTemp(Ity_I32);
18104 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18105 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18106
18107 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18108 of the msb. Since it is clear, we return the index of the
18109 lsb. */
sewardj009230b2013-01-26 11:47:55 +000018110 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18111 mkexpr(intRes2), mkU32(0x10000)));
sewardjf833ed42012-11-20 15:24:24 +000018112
18113 /* And thats our rcx. */
18114 putIReg32(R_RCX, newECX);
18115
18116 /* Now for the condition codes... */
18117
18118 /* C == 0 iff intRes2 == 0 */
florian99dd03e2013-01-29 03:56:06 +000018119 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18120 mkU32(0)),
18121 mkU32(1 << AMD64G_CC_SHIFT_C),
18122 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018123 /* Z == 1 iff any in argL is 0 */
florian99dd03e2013-01-29 03:56:06 +000018124 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18125 mkU32(1 << AMD64G_CC_SHIFT_Z),
18126 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018127 /* S == 1 iff any in argR is 0 */
florian99dd03e2013-01-29 03:56:06 +000018128 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18129 mkU32(1 << AMD64G_CC_SHIFT_S),
18130 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018131 /* O == IntRes2[0] */
18132 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18133 mkU32(0x01)),
18134 mkU8(AMD64G_CC_SHIFT_O));
18135
18136 /* Put them all together */
18137 IRTemp cc = newTemp(Ity_I64);
18138 assign(cc, widenUto64(binop(Iop_Or32,
18139 binop(Iop_Or32, c_bit, z_bit),
18140 binop(Iop_Or32, s_bit, o_bit))));
18141 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18142 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18143 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18144 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18145
18146 return delta;
18147}
sewardj56c30312012-06-12 08:45:39 +000018148
sewardjac75d7b2012-05-23 12:42:39 +000018149/* This can fail, in which case it returns the original (unchanged)
18150 delta. */
floriancacba8e2014-12-15 18:58:07 +000018151static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
sewardjac75d7b2012-05-23 12:42:39 +000018152 Long delta, Bool isAvx, UChar opc )
18153{
18154 Long delta0 = delta;
18155 UInt isISTRx = opc & 2;
18156 UInt isxSTRM = (opc & 1) ^ 1;
18157 UInt regNoL = 0;
18158 UInt regNoR = 0;
18159 UChar imm = 0;
18160 IRTemp addr = IRTemp_INVALID;
18161 Int alen = 0;
18162 HChar dis_buf[50];
18163
18164 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18165 (which is clean). Since we can't do that, use a dirty helper to
18166 compute the results directly from the XMM regs in the guest
18167 state. That means for the memory case, we need to move the left
18168 operand into a pseudo-register (XMM16, let's call it). */
18169 UChar modrm = getUChar(delta);
18170 if (epartIsReg(modrm)) {
18171 regNoL = eregOfRexRM(pfx, modrm);
18172 regNoR = gregOfRexRM(pfx, modrm);
18173 imm = getUChar(delta+1);
18174 delta += 1+1;
18175 } else {
18176 regNoL = 16; /* use XMM16 as an intermediary */
18177 regNoR = gregOfRexRM(pfx, modrm);
sewardjd343e622012-05-24 06:17:14 +000018178 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardjac75d7b2012-05-23 12:42:39 +000018179 /* No alignment check; I guess that makes sense, given that
18180 these insns are for dealing with C style strings. */
18181 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18182 imm = getUChar(delta+alen);
18183 delta += alen+1;
18184 }
18185
sewardj009230b2013-01-26 11:47:55 +000018186 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18187 itself. */
18188 if (regNoL == 16) {
18189 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18190 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18191 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18192 } else {
18193 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18194 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18195 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18196 }
18197
18198 /* Handle special case(s). */
sewardjf833ed42012-11-20 15:24:24 +000018199 if (imm == 0x3A && isISTRx && !isxSTRM) {
18200 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18201 opc, imm, dis_buf);
18202 }
18203
sewardjac75d7b2012-05-23 12:42:39 +000018204 /* Now we know the XMM reg numbers for the operands, and the
18205 immediate byte. Is it one we can actually handle? Throw out any
18206 cases for which the helper function has not been verified. */
18207 switch (imm) {
sewardjeead3192014-05-21 14:42:04 +000018208 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
18209 case 0x12: case 0x14: case 0x1A:
18210 case 0x30: case 0x34: case 0x38: case 0x3A:
18211 case 0x40: case 0x44: case 0x46: case 0x4A:
sewardjac75d7b2012-05-23 12:42:39 +000018212 break;
sewardjeead3192014-05-21 14:42:04 +000018213 // the 16-bit character versions of the above
18214 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
18215 case 0x13: case 0x1B:
18216 case 0x39: case 0x3B:
18217 case 0x45: case 0x4B:
sewardjac75d7b2012-05-23 12:42:39 +000018218 break;
18219 default:
18220 return delta0; /*FAIL*/
18221 }
18222
18223 /* Who ya gonna call? Presumably not Ghostbusters. */
18224 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
florian55085f82012-11-21 00:36:55 +000018225 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
sewardjac75d7b2012-05-23 12:42:39 +000018226
18227 /* Round up the arguments. Note that this is a kludge -- the use
18228 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18229 the host's word size is 64-bit. */
18230 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18231 UInt gstOffR = ymmGuestRegOffset(regNoR);
18232
18233 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18234 IRExpr* gstOffLe = mkU64(gstOffL);
18235 IRExpr* gstOffRe = mkU64(gstOffR);
18236 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18237 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18238 IRExpr** args
florian90419562013-08-15 20:54:52 +000018239 = mkIRExprVec_6( IRExpr_BBPTR(),
sewardj74142b82013-08-08 10:28:59 +000018240 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
sewardjac75d7b2012-05-23 12:42:39 +000018241
18242 IRTemp resT = newTemp(Ity_I64);
18243 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18244 /* It's not really a dirty call, but we can't use the clean helper
18245 mechanism here for the very lame reason that we can't pass 2 x
sewardj74142b82013-08-08 10:28:59 +000018246 V128s by value to a helper. Hence this roundabout scheme. */
sewardjac75d7b2012-05-23 12:42:39 +000018247 d->nFxState = 2;
sewardjc9069f22012-06-01 16:09:50 +000018248 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardjac75d7b2012-05-23 12:42:39 +000018249 d->fxState[0].fx = Ifx_Read;
18250 d->fxState[0].offset = gstOffL;
18251 d->fxState[0].size = sizeof(U128);
18252 d->fxState[1].fx = Ifx_Read;
18253 d->fxState[1].offset = gstOffR;
18254 d->fxState[1].size = sizeof(U128);
18255 if (isxSTRM) {
18256 /* Declare that the helper writes XMM0. */
18257 d->nFxState = 3;
18258 d->fxState[2].fx = Ifx_Write;
18259 d->fxState[2].offset = ymmGuestRegOffset(0);
18260 d->fxState[2].size = sizeof(U128);
18261 }
18262
18263 stmt( IRStmt_Dirty(d) );
18264
18265 /* Now resT[15:0] holds the new OSZACP values, so the condition
18266 codes must be updated. And for a xSTRI case, resT[31:16] holds
18267 the new ECX value, so stash that too. */
18268 if (!isxSTRM) {
18269 putIReg64(R_RCX, binop(Iop_And64,
18270 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18271 mkU64(0xFFFF)));
18272 }
18273
18274 /* Zap the upper half of the dest reg as per AVX conventions. */
18275 if (isxSTRM && isAvx)
18276 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18277
18278 stmt( IRStmt_Put(
18279 OFFB_CC_DEP1,
18280 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18281 ));
18282 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18283 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18284 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18285
sewardjac75d7b2012-05-23 12:42:39 +000018286 return delta;
18287}
18288
18289
sewardj21459cb2012-06-18 14:05:52 +000018290static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18291{
sewardj4ed05e02012-06-18 15:01:30 +000018292 vassert(imm8 >= 0 && imm8 <= 15);
18293
sewardj21459cb2012-06-18 14:05:52 +000018294 // Create a V128 value which has the selected byte in the
18295 // specified lane, and zeroes everywhere else.
sewardj4ed05e02012-06-18 15:01:30 +000018296 IRTemp tmp128 = newTemp(Ity_V128);
sewardj21459cb2012-06-18 14:05:52 +000018297 IRTemp halfshift = newTemp(Ity_I64);
18298 assign(halfshift, binop(Iop_Shl64,
sewardj4ed05e02012-06-18 15:01:30 +000018299 unop(Iop_8Uto64, mkexpr(u8)),
18300 mkU8(8 * (imm8 & 7))));
sewardj21459cb2012-06-18 14:05:52 +000018301 if (imm8 < 8) {
18302 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18303 } else {
18304 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18305 }
18306
18307 UShort mask = ~(1 << imm8);
sewardj4ed05e02012-06-18 15:01:30 +000018308 IRTemp res = newTemp(Ity_V128);
18309 assign( res, binop(Iop_OrV128,
18310 mkexpr(tmp128),
18311 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
sewardj21459cb2012-06-18 14:05:52 +000018312 return res;
18313}
18314
18315
sewardj6faf7cc2012-05-25 15:53:01 +000018316static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18317{
18318 IRTemp z32 = newTemp(Ity_I32);
18319 assign(z32, mkU32(0));
18320
18321 /* Surround u32 with zeroes as per imm, giving us something we can
18322 OR into a suitably masked-out v128.*/
18323 IRTemp withZs = newTemp(Ity_V128);
18324 UShort mask = 0;
18325 switch (imm8) {
18326 case 3: mask = 0x0FFF;
sewardj4b1cc832012-06-13 11:10:20 +000018327 assign(withZs, mkV128from32s(u32, z32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018328 break;
18329 case 2: mask = 0xF0FF;
sewardj4b1cc832012-06-13 11:10:20 +000018330 assign(withZs, mkV128from32s(z32, u32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018331 break;
18332 case 1: mask = 0xFF0F;
sewardj4b1cc832012-06-13 11:10:20 +000018333 assign(withZs, mkV128from32s(z32, z32, u32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018334 break;
18335 case 0: mask = 0xFFF0;
sewardj4b1cc832012-06-13 11:10:20 +000018336 assign(withZs, mkV128from32s(z32, z32, z32, u32));
sewardj6faf7cc2012-05-25 15:53:01 +000018337 break;
18338 default: vassert(0);
18339 }
18340
18341 IRTemp res = newTemp(Ity_V128);
18342 assign(res, binop( Iop_OrV128,
18343 mkexpr(withZs),
18344 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18345 return res;
18346}
18347
18348
sewardj98d02cc2012-06-02 11:55:25 +000018349static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18350{
18351 /* Surround u64 with zeroes as per imm, giving us something we can
18352 OR into a suitably masked-out v128.*/
18353 IRTemp withZs = newTemp(Ity_V128);
18354 UShort mask = 0;
18355 if (imm8 == 0) {
18356 mask = 0xFF00;
18357 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18358 } else {
18359 vassert(imm8 == 1);
18360 mask = 0x00FF;
18361 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18362 }
18363
18364 IRTemp res = newTemp(Ity_V128);
18365 assign( res, binop( Iop_OrV128,
18366 mkexpr(withZs),
18367 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18368 return res;
18369}
18370
18371
sewardjcfca8cd2012-05-27 08:25:42 +000018372static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18373{
18374 const IRTemp inval = IRTemp_INVALID;
18375 IRTemp dstDs[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000018376 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000018377
18378 vassert(imm8 <= 255);
18379 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18380
18381 UInt imm8_zmask = (imm8 & 15);
18382 IRTemp zero_32 = newTemp(Ity_I32);
18383 assign( zero_32, mkU32(0) );
18384 IRTemp resV = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000018385 assign( resV, mkV128from32s(
sewardjcfca8cd2012-05-27 08:25:42 +000018386 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18387 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18388 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18389 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18390 return resV;
18391}
18392
18393
floriancacba8e2014-12-15 18:58:07 +000018394static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
sewardje8a7eb72012-06-12 14:59:17 +000018395 Long delta, Bool isAvx )
18396{
18397 IRTemp addr = IRTemp_INVALID;
18398 Int alen = 0;
18399 HChar dis_buf[50];
18400 IRTemp xmm_vec = newTemp(Ity_V128);
18401 IRTemp sel_lane = newTemp(Ity_I32);
18402 IRTemp shr_lane = newTemp(Ity_I32);
florian55085f82012-11-21 00:36:55 +000018403 const HChar* mbV = isAvx ? "v" : "";
sewardje8a7eb72012-06-12 14:59:17 +000018404 UChar modrm = getUChar(delta);
18405 IRTemp t3, t2, t1, t0;
18406 Int imm8;
18407 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18408 t3 = t2 = t1 = t0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000018409 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardje8a7eb72012-06-12 14:59:17 +000018410
18411 if ( epartIsReg( modrm ) ) {
18412 imm8 = (Int)getUChar(delta+1);
18413 } else {
18414 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18415 imm8 = (Int)getUChar(delta+alen);
18416 }
18417 switch ( (imm8 >> 2) & 3 ) {
18418 case 0: assign( sel_lane, mkexpr(t0) ); break;
18419 case 1: assign( sel_lane, mkexpr(t1) ); break;
18420 case 2: assign( sel_lane, mkexpr(t2) ); break;
18421 case 3: assign( sel_lane, mkexpr(t3) ); break;
18422 default: vassert(0);
18423 }
18424 assign( shr_lane,
18425 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
18426
18427 if ( epartIsReg( modrm ) ) {
18428 putIReg64( eregOfRexRM(pfx,modrm),
18429 unop( Iop_32Uto64,
18430 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
18431 delta += 1+1;
18432 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
18433 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18434 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18435 } else {
18436 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
18437 delta += alen+1;
18438 DIP( "%spextrb $%d,%s,%s\n", mbV,
18439 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18440 }
18441
18442 return delta;
18443}
18444
18445
sewardj4ed05e02012-06-18 15:01:30 +000018446static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18447{
18448 vassert(imm8 < 256);
18449 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
18450 IRTemp and_vec = newTemp(Ity_V128);
18451 IRTemp sum_vec = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000018452 IRTemp rm = newTemp(Ity_I32);
18453 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
sewardj4ed05e02012-06-18 15:01:30 +000018454 assign( and_vec, binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018455 triop( Iop_Mul64Fx2,
18456 mkexpr(rm),
sewardj4ed05e02012-06-18 15:01:30 +000018457 mkexpr(dst_vec), mkexpr(src_vec) ),
18458 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
18459
18460 assign( sum_vec, binop( Iop_Add64F0x2,
18461 binop( Iop_InterleaveHI64x2,
18462 mkexpr(and_vec), mkexpr(and_vec) ),
18463 binop( Iop_InterleaveLO64x2,
18464 mkexpr(and_vec), mkexpr(and_vec) ) ) );
18465 IRTemp res = newTemp(Ity_V128);
18466 assign(res, binop( Iop_AndV128,
18467 binop( Iop_InterleaveLO64x2,
18468 mkexpr(sum_vec), mkexpr(sum_vec) ),
18469 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
18470 return res;
18471}
18472
18473
sewardjadf357c2012-06-24 13:44:17 +000018474static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18475{
18476 vassert(imm8 < 256);
18477 IRTemp tmp_prod_vec = newTemp(Ity_V128);
18478 IRTemp prod_vec = newTemp(Ity_V128);
18479 IRTemp sum_vec = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000018480 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000018481 IRTemp v3, v2, v1, v0;
18482 v3 = v2 = v1 = v0 = IRTemp_INVALID;
18483 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
18484 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
18485 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
18486 0xFFFF };
18487
sewardj9571dc02014-01-26 18:34:23 +000018488 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
sewardjadf357c2012-06-24 13:44:17 +000018489 assign( tmp_prod_vec,
18490 binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018491 triop( Iop_Mul32Fx4,
18492 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
sewardjadf357c2012-06-24 13:44:17 +000018493 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
18494 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
18495 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
18496
sewardj9571dc02014-01-26 18:34:23 +000018497 assign( sum_vec, triop( Iop_Add32Fx4,
18498 mkexpr(rm),
sewardjadf357c2012-06-24 13:44:17 +000018499 binop( Iop_InterleaveHI32x4,
18500 mkexpr(prod_vec), mkexpr(prod_vec) ),
18501 binop( Iop_InterleaveLO32x4,
18502 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
18503
18504 IRTemp res = newTemp(Ity_V128);
18505 assign( res, binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018506 triop( Iop_Add32Fx4,
18507 mkexpr(rm),
sewardjadf357c2012-06-24 13:44:17 +000018508 binop( Iop_InterleaveHI32x4,
18509 mkexpr(sum_vec), mkexpr(sum_vec) ),
18510 binop( Iop_InterleaveLO32x4,
18511 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
18512 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
18513 return res;
18514}
18515
18516
sewardj8516a1f2012-06-24 14:26:30 +000018517static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
18518{
18519 /* Mask out bits of the operands we don't need. This isn't
18520 strictly necessary, but it does ensure Memcheck doesn't
18521 give us any false uninitialised value errors as a
18522 result. */
18523 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
18524 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
18525
18526 IRTemp src_maskV = newTemp(Ity_V128);
18527 IRTemp dst_maskV = newTemp(Ity_V128);
18528 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
18529 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
18530
18531 IRTemp src_masked = newTemp(Ity_V128);
18532 IRTemp dst_masked = newTemp(Ity_V128);
18533 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
18534 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
18535
18536 /* Generate 4 64 bit values that we can hand to a clean helper */
18537 IRTemp sHi = newTemp(Ity_I64);
18538 IRTemp sLo = newTemp(Ity_I64);
18539 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
18540 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
18541
18542 IRTemp dHi = newTemp(Ity_I64);
18543 IRTemp dLo = newTemp(Ity_I64);
18544 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
18545 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
18546
18547 /* Compute halves of the result separately */
18548 IRTemp resHi = newTemp(Ity_I64);
18549 IRTemp resLo = newTemp(Ity_I64);
18550
18551 IRExpr** argsHi
18552 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
18553 mkU64( 0x80 | (imm8 & 7) ));
18554 IRExpr** argsLo
18555 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
18556 mkU64( 0x00 | (imm8 & 7) ));
18557
18558 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
18559 "amd64g_calc_mpsadbw",
18560 &amd64g_calc_mpsadbw, argsHi ));
18561 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
18562 "amd64g_calc_mpsadbw",
18563 &amd64g_calc_mpsadbw, argsLo ));
18564
18565 IRTemp res = newTemp(Ity_V128);
18566 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
18567 return res;
18568}
18569
floriancacba8e2014-12-15 18:58:07 +000018570static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
sewardjadf357c2012-06-24 13:44:17 +000018571 Long delta, Bool isAvx )
18572{
18573 IRTemp addr = IRTemp_INVALID;
18574 Int alen = 0;
18575 HChar dis_buf[50];
18576 UChar modrm = getUChar(delta);
18577 Int imm8_10;
18578 IRTemp xmm_vec = newTemp(Ity_V128);
18579 IRTemp src_dword = newTemp(Ity_I32);
18580 UInt rG = gregOfRexRM(pfx,modrm);
18581 IRTemp t3, t2, t1, t0;
18582 t3 = t2 = t1 = t0 = IRTemp_INVALID;
18583
18584 assign( xmm_vec, getXMMReg( rG ) );
18585 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18586
18587 if ( epartIsReg( modrm ) ) {
18588 imm8_10 = (Int)(getUChar(delta+1) & 3);
18589 } else {
18590 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18591 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18592 }
18593
18594 switch ( imm8_10 ) {
18595 case 0: assign( src_dword, mkexpr(t0) ); break;
18596 case 1: assign( src_dword, mkexpr(t1) ); break;
18597 case 2: assign( src_dword, mkexpr(t2) ); break;
18598 case 3: assign( src_dword, mkexpr(t3) ); break;
18599 default: vassert(0);
18600 }
18601
18602 if ( epartIsReg( modrm ) ) {
18603 UInt rE = eregOfRexRM(pfx,modrm);
18604 putIReg32( rE, mkexpr(src_dword) );
18605 delta += 1+1;
18606 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
18607 nameXMMReg( rG ), nameIReg32( rE ) );
18608 } else {
18609 storeLE( mkexpr(addr), mkexpr(src_dword) );
18610 delta += alen+1;
18611 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
18612 nameXMMReg( rG ), dis_buf );
18613 }
18614
18615 return delta;
18616}
18617
18618
sewardj1407a362012-06-24 15:11:38 +000018619static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
18620{
18621 IRTemp t0 = newTemp(Ity_I64);
18622 IRTemp t1 = newTemp(Ity_I64);
18623 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
18624 mkexpr(dV)));
18625 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
18626 mkexpr(sV)));
18627
18628 IRTemp t2 = newTemp(Ity_I64);
18629 IRTemp t3 = newTemp(Ity_I64);
18630
18631 IRExpr** args;
18632
18633 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
18634 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
18635 &amd64g_calculate_pclmul, args));
18636 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
18637 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
18638 &amd64g_calculate_pclmul, args));
18639
18640 IRTemp res = newTemp(Ity_V128);
18641 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
18642 return res;
18643}
18644
18645
sewardj80611e32012-01-20 13:07:24 +000018646__attribute__((noinline))
18647static
18648Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
floriancacba8e2014-12-15 18:58:07 +000018649 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000018650 Prefix pfx, Int sz, Long deltaIN )
18651{
18652 IRTemp addr = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000018653 UChar modrm = 0;
18654 Int alen = 0;
18655 HChar dis_buf[50];
18656
18657 *decode_OK = False;
18658
18659 Long delta = deltaIN;
18660 UChar opc = getUChar(delta);
18661 delta++;
18662 switch (opc) {
18663
18664 case 0x08:
18665 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
18666 if (have66noF2noF3(pfx) && sz == 2) {
18667
18668 IRTemp src0 = newTemp(Ity_F32);
18669 IRTemp src1 = newTemp(Ity_F32);
18670 IRTemp src2 = newTemp(Ity_F32);
18671 IRTemp src3 = newTemp(Ity_F32);
18672 IRTemp res0 = newTemp(Ity_F32);
18673 IRTemp res1 = newTemp(Ity_F32);
18674 IRTemp res2 = newTemp(Ity_F32);
18675 IRTemp res3 = newTemp(Ity_F32);
18676 IRTemp rm = newTemp(Ity_I32);
18677 Int imm = 0;
18678
18679 modrm = getUChar(delta);
18680
18681 if (epartIsReg(modrm)) {
18682 assign( src0,
18683 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
18684 assign( src1,
18685 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
18686 assign( src2,
18687 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
18688 assign( src3,
18689 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
18690 imm = getUChar(delta+1);
18691 if (imm & ~15) goto decode_failure;
18692 delta += 1+1;
18693 DIP( "roundps $%d,%s,%s\n",
18694 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18695 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18696 } else {
sewardjd343e622012-05-24 06:17:14 +000018697 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018698 gen_SEGV_if_not_16_aligned(addr);
18699 assign( src0, loadLE(Ity_F32,
18700 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
18701 assign( src1, loadLE(Ity_F32,
18702 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
18703 assign( src2, loadLE(Ity_F32,
18704 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
18705 assign( src3, loadLE(Ity_F32,
18706 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
18707 imm = getUChar(delta+alen);
18708 if (imm & ~15) goto decode_failure;
18709 delta += alen+1;
18710 DIP( "roundps $%d,%s,%s\n",
18711 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18712 }
18713
18714 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18715 that encoding is the same as the encoding for IRRoundingMode,
18716 we can use that value directly in the IR as a rounding
18717 mode. */
18718 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
18719
18720 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
18721 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
18722 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
18723 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
18724
18725 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
18726 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
18727 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
18728 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
18729
18730 goto decode_success;
18731 }
18732 break;
18733
18734 case 0x09:
18735 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
18736 if (have66noF2noF3(pfx) && sz == 2) {
18737
18738 IRTemp src0 = newTemp(Ity_F64);
18739 IRTemp src1 = newTemp(Ity_F64);
18740 IRTemp res0 = newTemp(Ity_F64);
18741 IRTemp res1 = newTemp(Ity_F64);
18742 IRTemp rm = newTemp(Ity_I32);
18743 Int imm = 0;
18744
18745 modrm = getUChar(delta);
18746
18747 if (epartIsReg(modrm)) {
18748 assign( src0,
18749 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
18750 assign( src1,
18751 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
18752 imm = getUChar(delta+1);
18753 if (imm & ~15) goto decode_failure;
18754 delta += 1+1;
18755 DIP( "roundpd $%d,%s,%s\n",
18756 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18757 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18758 } else {
sewardjd343e622012-05-24 06:17:14 +000018759 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018760 gen_SEGV_if_not_16_aligned(addr);
18761 assign( src0, loadLE(Ity_F64,
18762 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
18763 assign( src1, loadLE(Ity_F64,
18764 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
18765 imm = getUChar(delta+alen);
18766 if (imm & ~15) goto decode_failure;
18767 delta += alen+1;
18768 DIP( "roundpd $%d,%s,%s\n",
18769 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18770 }
18771
18772 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18773 that encoding is the same as the encoding for IRRoundingMode,
18774 we can use that value directly in the IR as a rounding
18775 mode. */
18776 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
18777
18778 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
18779 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
18780
18781 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
18782 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
18783
18784 goto decode_success;
18785 }
18786 break;
18787
18788 case 0x0A:
18789 case 0x0B:
18790 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
18791 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
18792 */
18793 if (have66noF2noF3(pfx) && sz == 2) {
18794
18795 Bool isD = opc == 0x0B;
18796 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
18797 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
18798 Int imm = 0;
18799
18800 modrm = getUChar(delta);
18801
18802 if (epartIsReg(modrm)) {
18803 assign( src,
18804 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
18805 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
18806 imm = getUChar(delta+1);
18807 if (imm & ~15) goto decode_failure;
18808 delta += 1+1;
18809 DIP( "rounds%c $%d,%s,%s\n",
18810 isD ? 'd' : 's',
18811 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18812 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18813 } else {
sewardjd343e622012-05-24 06:17:14 +000018814 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018815 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
18816 imm = getUChar(delta+alen);
18817 if (imm & ~15) goto decode_failure;
18818 delta += alen+1;
18819 DIP( "rounds%c $%d,%s,%s\n",
18820 isD ? 'd' : 's',
18821 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18822 }
18823
18824 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18825 that encoding is the same as the encoding for IRRoundingMode,
18826 we can use that value directly in the IR as a rounding
18827 mode. */
18828 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
18829 (imm & 4) ? get_sse_roundingmode()
18830 : mkU32(imm & 3),
18831 mkexpr(src)) );
18832
18833 if (isD)
18834 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
18835 else
18836 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
18837
18838 goto decode_success;
18839 }
18840 break;
18841
18842 case 0x0C:
18843 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
18844 Blend Packed Single Precision Floating-Point Values (XMM) */
18845 if (have66noF2noF3(pfx) && sz == 2) {
18846
18847 Int imm8;
18848 IRTemp dst_vec = newTemp(Ity_V128);
18849 IRTemp src_vec = newTemp(Ity_V128);
18850
18851 modrm = getUChar(delta);
18852
18853 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18854
18855 if ( epartIsReg( modrm ) ) {
18856 imm8 = (Int)getUChar(delta+1);
18857 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18858 delta += 1+1;
18859 DIP( "blendps $%d, %s,%s\n", imm8,
18860 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18861 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18862 } else {
18863 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18864 1/* imm8 is 1 byte after the amode */ );
18865 gen_SEGV_if_not_16_aligned( addr );
18866 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18867 imm8 = (Int)getUChar(delta+alen);
18868 delta += alen+1;
18869 DIP( "blendpd $%d, %s,%s\n",
18870 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18871 }
18872
sewardj80611e32012-01-20 13:07:24 +000018873 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018874 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018875 goto decode_success;
18876 }
18877 break;
18878
18879 case 0x0D:
18880 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
18881 Blend Packed Double Precision Floating-Point Values (XMM) */
18882 if (have66noF2noF3(pfx) && sz == 2) {
18883
18884 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000018885 IRTemp dst_vec = newTemp(Ity_V128);
18886 IRTemp src_vec = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000018887
18888 modrm = getUChar(delta);
18889 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18890
18891 if ( epartIsReg( modrm ) ) {
18892 imm8 = (Int)getUChar(delta+1);
18893 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18894 delta += 1+1;
18895 DIP( "blendpd $%d, %s,%s\n", imm8,
18896 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18897 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18898 } else {
18899 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18900 1/* imm8 is 1 byte after the amode */ );
18901 gen_SEGV_if_not_16_aligned( addr );
18902 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18903 imm8 = (Int)getUChar(delta+alen);
18904 delta += alen+1;
18905 DIP( "blendpd $%d, %s,%s\n",
18906 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18907 }
18908
sewardj80611e32012-01-20 13:07:24 +000018909 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018910 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018911 goto decode_success;
18912 }
18913 break;
18914
18915 case 0x0E:
18916 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
18917 Blend Packed Words (XMM) */
18918 if (have66noF2noF3(pfx) && sz == 2) {
18919
18920 Int imm8;
18921 IRTemp dst_vec = newTemp(Ity_V128);
18922 IRTemp src_vec = newTemp(Ity_V128);
18923
18924 modrm = getUChar(delta);
18925
18926 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18927
18928 if ( epartIsReg( modrm ) ) {
18929 imm8 = (Int)getUChar(delta+1);
18930 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18931 delta += 1+1;
18932 DIP( "pblendw $%d, %s,%s\n", imm8,
18933 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18934 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18935 } else {
18936 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18937 1/* imm8 is 1 byte after the amode */ );
18938 gen_SEGV_if_not_16_aligned( addr );
18939 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18940 imm8 = (Int)getUChar(delta+alen);
18941 delta += alen+1;
18942 DIP( "pblendw $%d, %s,%s\n",
18943 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18944 }
18945
sewardj80611e32012-01-20 13:07:24 +000018946 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018947 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018948 goto decode_success;
18949 }
18950 break;
18951
18952 case 0x14:
18953 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
18954 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
18955 (XMM) */
18956 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000018957 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018958 goto decode_success;
18959 }
18960 break;
18961
18962 case 0x15:
18963 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
18964 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
18965 (XMM) */
18966 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000018967 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018968 goto decode_success;
18969 }
18970 break;
18971
18972 case 0x16:
18973 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
18974 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
18975 Note that this insn has the same opcodes as PEXTRQ, but
18976 here the REX.W bit is _not_ present */
18977 if (have66noF2noF3(pfx)
18978 && sz == 2 /* REX.W is _not_ present */) {
sewardjc4530ae2012-05-21 10:18:49 +000018979 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018980 goto decode_success;
18981 }
18982 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
18983 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
18984 Note that this insn has the same opcodes as PEXTRD, but
18985 here the REX.W bit is present */
18986 if (have66noF2noF3(pfx)
18987 && sz == 8 /* REX.W is present */) {
sewardj56c30312012-06-12 08:45:39 +000018988 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000018989 goto decode_success;
18990 }
18991 break;
18992
18993 case 0x17:
18994 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
18995 float from xmm reg and store in gen.reg or mem. This is
18996 identical to PEXTRD, except that REX.W appears to be ignored.
18997 */
18998 if (have66noF2noF3(pfx)
18999 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjadf357c2012-06-24 13:44:17 +000019000 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000019001 goto decode_success;
19002 }
19003 break;
19004
19005 case 0x20:
19006 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19007 Extract byte from r32/m8 and insert into xmm1 */
19008 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000019009 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000019010 IRTemp new8 = newTemp(Ity_I8);
sewardj80611e32012-01-20 13:07:24 +000019011 modrm = getUChar(delta);
sewardj21459cb2012-06-18 14:05:52 +000019012 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019013 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000019014 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019015 imm8 = (Int)(getUChar(delta+1) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000019016 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
sewardj80611e32012-01-20 13:07:24 +000019017 delta += 1+1;
19018 DIP( "pinsrb $%d,%s,%s\n", imm8,
sewardj4ed05e02012-06-18 15:01:30 +000019019 nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019020 } else {
19021 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19022 imm8 = (Int)(getUChar(delta+alen) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000019023 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019024 delta += alen+1;
19025 DIP( "pinsrb $%d,%s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000019026 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019027 }
sewardj21459cb2012-06-18 14:05:52 +000019028 IRTemp src_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000019029 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19030 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19031 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019032 goto decode_success;
19033 }
19034 break;
19035
19036 case 0x21:
sewardjcfca8cd2012-05-27 08:25:42 +000019037 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
sewardj80611e32012-01-20 13:07:24 +000019038 Insert Packed Single Precision Floating-Point Value (XMM) */
19039 if (have66noF2noF3(pfx) && sz == 2) {
sewardjcfca8cd2012-05-27 08:25:42 +000019040 UInt imm8;
19041 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19042 const IRTemp inval = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000019043
19044 modrm = getUChar(delta);
sewardjcfca8cd2012-05-27 08:25:42 +000019045 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019046
19047 if ( epartIsReg( modrm ) ) {
sewardjcfca8cd2012-05-27 08:25:42 +000019048 UInt rE = eregOfRexRM(pfx, modrm);
19049 IRTemp vE = newTemp(Ity_V128);
19050 assign( vE, getXMMReg(rE) );
19051 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000019052 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000019053 imm8 = getUChar(delta+1);
19054 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
sewardj80611e32012-01-20 13:07:24 +000019055 delta += 1+1;
sewardjcfca8cd2012-05-27 08:25:42 +000019056 DIP( "insertps $%u, %s,%s\n",
19057 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019058 } else {
sewardjcfca8cd2012-05-27 08:25:42 +000019059 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19060 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19061 imm8 = getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000019062 delta += alen+1;
sewardjcfca8cd2012-05-27 08:25:42 +000019063 DIP( "insertps $%u, %s,%s\n",
19064 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019065 }
19066
sewardjcfca8cd2012-05-27 08:25:42 +000019067 IRTemp vG = newTemp(Ity_V128);
19068 assign( vG, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019069
sewardjcfca8cd2012-05-27 08:25:42 +000019070 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
sewardj80611e32012-01-20 13:07:24 +000019071 goto decode_success;
19072 }
19073 break;
19074
19075 case 0x22:
19076 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19077 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19078 if (have66noF2noF3(pfx)
19079 && sz == 2 /* REX.W is NOT present */) {
sewardj6faf7cc2012-05-25 15:53:01 +000019080 Int imm8_10;
19081 IRTemp src_u32 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000019082 modrm = getUChar(delta);
sewardj6faf7cc2012-05-25 15:53:01 +000019083 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019084
19085 if ( epartIsReg( modrm ) ) {
sewardj6faf7cc2012-05-25 15:53:01 +000019086 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019087 imm8_10 = (Int)(getUChar(delta+1) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000019088 assign( src_u32, getIReg32( rE ) );
sewardj80611e32012-01-20 13:07:24 +000019089 delta += 1+1;
sewardj6faf7cc2012-05-25 15:53:01 +000019090 DIP( "pinsrd $%d, %s,%s\n",
19091 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019092 } else {
19093 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19094 imm8_10 = (Int)(getUChar(delta+alen) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000019095 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019096 delta += alen+1;
19097 DIP( "pinsrd $%d, %s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000019098 imm8_10, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019099 }
19100
sewardj6faf7cc2012-05-25 15:53:01 +000019101 IRTemp src_vec = newTemp(Ity_V128);
19102 assign(src_vec, getXMMReg( rG ));
19103 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19104 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000019105 goto decode_success;
19106 }
19107 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19108 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19109 if (have66noF2noF3(pfx)
19110 && sz == 8 /* REX.W is present */) {
sewardj80611e32012-01-20 13:07:24 +000019111 Int imm8_0;
sewardj98d02cc2012-06-02 11:55:25 +000019112 IRTemp src_u64 = newTemp(Ity_I64);
sewardj80611e32012-01-20 13:07:24 +000019113 modrm = getUChar(delta);
sewardj98d02cc2012-06-02 11:55:25 +000019114 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019115
19116 if ( epartIsReg( modrm ) ) {
sewardj98d02cc2012-06-02 11:55:25 +000019117 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019118 imm8_0 = (Int)(getUChar(delta+1) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000019119 assign( src_u64, getIReg64( rE ) );
sewardj80611e32012-01-20 13:07:24 +000019120 delta += 1+1;
sewardj98d02cc2012-06-02 11:55:25 +000019121 DIP( "pinsrq $%d, %s,%s\n",
19122 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019123 } else {
19124 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19125 imm8_0 = (Int)(getUChar(delta+alen) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000019126 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019127 delta += alen+1;
19128 DIP( "pinsrq $%d, %s,%s\n",
sewardj98d02cc2012-06-02 11:55:25 +000019129 imm8_0, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019130 }
19131
sewardj98d02cc2012-06-02 11:55:25 +000019132 IRTemp src_vec = newTemp(Ity_V128);
19133 assign(src_vec, getXMMReg( rG ));
19134 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19135 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000019136 goto decode_success;
19137 }
19138 break;
19139
19140 case 0x40:
19141 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19142 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19143 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000019144 modrm = getUChar(delta);
sewardjadf357c2012-06-24 13:44:17 +000019145 Int imm8;
19146 IRTemp src_vec = newTemp(Ity_V128);
19147 IRTemp dst_vec = newTemp(Ity_V128);
19148 UInt rG = gregOfRexRM(pfx, modrm);
19149 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000019150 if ( epartIsReg( modrm ) ) {
sewardjadf357c2012-06-24 13:44:17 +000019151 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019152 imm8 = (Int)getUChar(delta+1);
sewardjadf357c2012-06-24 13:44:17 +000019153 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019154 delta += 1+1;
sewardjadf357c2012-06-24 13:44:17 +000019155 DIP( "dpps $%d, %s,%s\n",
19156 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019157 } else {
19158 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19159 1/* imm8 is 1 byte after the amode */ );
19160 gen_SEGV_if_not_16_aligned( addr );
sewardjadf357c2012-06-24 13:44:17 +000019161 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019162 imm8 = (Int)getUChar(delta+alen);
19163 delta += alen+1;
19164 DIP( "dpps $%d, %s,%s\n",
sewardjadf357c2012-06-24 13:44:17 +000019165 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019166 }
sewardjadf357c2012-06-24 13:44:17 +000019167 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19168 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019169 goto decode_success;
19170 }
19171 break;
19172
19173 case 0x41:
19174 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19175 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19176 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000019177 modrm = getUChar(delta);
19178 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000019179 IRTemp src_vec = newTemp(Ity_V128);
19180 IRTemp dst_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000019181 UInt rG = gregOfRexRM(pfx, modrm);
19182 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000019183 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000019184 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019185 imm8 = (Int)getUChar(delta+1);
sewardj4ed05e02012-06-18 15:01:30 +000019186 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019187 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000019188 DIP( "dppd $%d, %s,%s\n",
19189 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019190 } else {
19191 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19192 1/* imm8 is 1 byte after the amode */ );
19193 gen_SEGV_if_not_16_aligned( addr );
19194 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19195 imm8 = (Int)getUChar(delta+alen);
19196 delta += alen+1;
19197 DIP( "dppd $%d, %s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000019198 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019199 }
sewardj4ed05e02012-06-18 15:01:30 +000019200 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19201 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019202 goto decode_success;
19203 }
19204 break;
19205
sewardj4d5bce22012-02-21 11:02:44 +000019206 case 0x42:
19207 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19208 Multiple Packed Sums of Absolule Difference (XMM) */
19209 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4d5bce22012-02-21 11:02:44 +000019210 Int imm8;
19211 IRTemp src_vec = newTemp(Ity_V128);
19212 IRTemp dst_vec = newTemp(Ity_V128);
sewardj8516a1f2012-06-24 14:26:30 +000019213 modrm = getUChar(delta);
19214 UInt rG = gregOfRexRM(pfx, modrm);
sewardj4d5bce22012-02-21 11:02:44 +000019215
sewardj8516a1f2012-06-24 14:26:30 +000019216 assign( dst_vec, getXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019217
19218 if ( epartIsReg( modrm ) ) {
sewardj8516a1f2012-06-24 14:26:30 +000019219 UInt rE = eregOfRexRM(pfx, modrm);
19220
sewardj4d5bce22012-02-21 11:02:44 +000019221 imm8 = (Int)getUChar(delta+1);
sewardj8516a1f2012-06-24 14:26:30 +000019222 assign( src_vec, getXMMReg(rE) );
sewardj4d5bce22012-02-21 11:02:44 +000019223 delta += 1+1;
19224 DIP( "mpsadbw $%d, %s,%s\n", imm8,
sewardj8516a1f2012-06-24 14:26:30 +000019225 nameXMMReg(rE), nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019226 } else {
19227 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19228 1/* imm8 is 1 byte after the amode */ );
19229 gen_SEGV_if_not_16_aligned( addr );
19230 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19231 imm8 = (Int)getUChar(delta+alen);
19232 delta += alen+1;
sewardj8516a1f2012-06-24 14:26:30 +000019233 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019234 }
19235
sewardj8516a1f2012-06-24 14:26:30 +000019236 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
sewardj4d5bce22012-02-21 11:02:44 +000019237 goto decode_success;
19238 }
19239 break;
19240
sewardj80611e32012-01-20 13:07:24 +000019241 case 0x44:
19242 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19243 * Carry-less multiplication of selected XMM quadwords into XMM
19244 * registers (a.k.a multiplication of polynomials over GF(2))
19245 */
19246 if (have66noF2noF3(pfx) && sz == 2) {
19247
19248 Int imm8;
19249 IRTemp svec = newTemp(Ity_V128);
19250 IRTemp dvec = newTemp(Ity_V128);
sewardj1407a362012-06-24 15:11:38 +000019251 modrm = getUChar(delta);
19252 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019253
sewardj1407a362012-06-24 15:11:38 +000019254 assign( dvec, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019255
19256 if ( epartIsReg( modrm ) ) {
sewardj1407a362012-06-24 15:11:38 +000019257 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019258 imm8 = (Int)getUChar(delta+1);
sewardj1407a362012-06-24 15:11:38 +000019259 assign( svec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019260 delta += 1+1;
19261 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
sewardj1407a362012-06-24 15:11:38 +000019262 nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019263 } else {
19264 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19265 1/* imm8 is 1 byte after the amode */ );
19266 gen_SEGV_if_not_16_aligned( addr );
19267 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19268 imm8 = (Int)getUChar(delta+alen);
19269 delta += alen+1;
19270 DIP( "pclmulqdq $%d, %s,%s\n",
sewardj1407a362012-06-24 15:11:38 +000019271 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019272 }
19273
sewardj1407a362012-06-24 15:11:38 +000019274 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000019275 goto decode_success;
19276 }
19277 break;
19278
19279 case 0x60:
19280 case 0x61:
19281 case 0x62:
19282 case 0x63:
19283 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19284 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19285 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19286 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19287 (selected special cases that actually occur in glibc,
19288 not by any means a complete implementation.)
19289 */
19290 if (have66noF2noF3(pfx) && sz == 2) {
sewardjac75d7b2012-05-23 12:42:39 +000019291 Long delta0 = delta;
19292 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19293 if (delta > delta0) goto decode_success;
19294 /* else fall though; dis_PCMPxSTRx failed to decode it */
sewardj80611e32012-01-20 13:07:24 +000019295 }
19296 break;
19297
sewardjac75d7b2012-05-23 12:42:39 +000019298 case 0xDF:
philippeff4d6be2012-02-14 21:34:56 +000019299 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19300 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000019301 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
philippeff4d6be2012-02-14 21:34:56 +000019302 goto decode_success;
19303 }
19304 break;
19305
sewardj80611e32012-01-20 13:07:24 +000019306 default:
19307 break;
19308
19309 }
19310
19311 decode_failure:
19312 *decode_OK = False;
19313 return deltaIN;
19314
19315 decode_success:
19316 *decode_OK = True;
19317 return delta;
19318}
19319
19320
19321/*------------------------------------------------------------*/
19322/*--- ---*/
19323/*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19324/*--- ---*/
19325/*------------------------------------------------------------*/
19326
19327__attribute__((noinline))
19328static
19329Long dis_ESC_NONE (
19330 /*MB_OUT*/DisResult* dres,
19331 /*MB_OUT*/Bool* expect_CAS,
florianbeac5302014-12-31 12:09:38 +000019332 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardj80611e32012-01-20 13:07:24 +000019333 Bool resteerCisOk,
19334 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000019335 const VexArchInfo* archinfo,
19336 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000019337 Prefix pfx, Int sz, Long deltaIN
19338 )
19339{
19340 Long d64 = 0;
19341 UChar abyte = 0;
19342 IRTemp addr = IRTemp_INVALID;
19343 IRTemp t1 = IRTemp_INVALID;
19344 IRTemp t2 = IRTemp_INVALID;
19345 IRTemp t3 = IRTemp_INVALID;
19346 IRTemp t4 = IRTemp_INVALID;
19347 IRTemp t5 = IRTemp_INVALID;
19348 IRType ty = Ity_INVALID;
19349 UChar modrm = 0;
19350 Int am_sz = 0;
19351 Int d_sz = 0;
19352 Int alen = 0;
19353 HChar dis_buf[50];
19354
19355 Long delta = deltaIN;
sewardj38b1d692013-10-15 17:21:42 +000019356 UChar opc = getUChar(delta); delta++;
19357
19358 /* delta now points at the modrm byte. In most of the cases that
19359 follow, neither the F2 nor F3 prefixes are allowed. However,
19360 for some basic arithmetic operations we have to allow F2/XACQ or
19361 F3/XREL in the case where the destination is memory and the LOCK
19362 prefix is also present. Do this check by looking at the modrm
19363 byte but not advancing delta over it. */
19364 /* By default, F2 and F3 are not allowed, so let's start off with
19365 that setting. */
19366 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19367 { UChar tmp_modrm = getUChar(delta);
19368 switch (opc) {
19369 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19370 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19371 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19372 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19373 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19374 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19375 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19376 if (!epartIsReg(tmp_modrm)
19377 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19378 /* dst is mem, and we have F2 or F3 but not both */
19379 validF2orF3 = True;
19380 }
19381 break;
19382 default:
19383 break;
19384 }
19385 }
19386
19387 /* Now, in the switch below, for the opc values examined by the
19388 switch above, use validF2orF3 rather than looking at pfx
19389 directly. */
sewardj80611e32012-01-20 13:07:24 +000019390 switch (opc) {
19391
19392 case 0x00: /* ADD Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019393 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019394 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
19395 return delta;
19396 case 0x01: /* ADD Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019397 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019398 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
19399 return delta;
19400
19401 case 0x02: /* ADD Eb,Gb */
19402 if (haveF2orF3(pfx)) goto decode_failure;
19403 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
19404 return delta;
19405 case 0x03: /* ADD Ev,Gv */
19406 if (haveF2orF3(pfx)) goto decode_failure;
19407 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
19408 return delta;
19409
19410 case 0x04: /* ADD Ib, AL */
19411 if (haveF2orF3(pfx)) goto decode_failure;
19412 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
19413 return delta;
19414 case 0x05: /* ADD Iv, eAX */
19415 if (haveF2orF3(pfx)) goto decode_failure;
19416 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
19417 return delta;
19418
19419 case 0x08: /* OR Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019420 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019421 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
19422 return delta;
19423 case 0x09: /* OR Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019424 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019425 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
19426 return delta;
19427
19428 case 0x0A: /* OR Eb,Gb */
19429 if (haveF2orF3(pfx)) goto decode_failure;
19430 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
19431 return delta;
19432 case 0x0B: /* OR Ev,Gv */
19433 if (haveF2orF3(pfx)) goto decode_failure;
19434 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
19435 return delta;
19436
19437 case 0x0C: /* OR Ib, AL */
19438 if (haveF2orF3(pfx)) goto decode_failure;
19439 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
19440 return delta;
19441 case 0x0D: /* OR Iv, eAX */
19442 if (haveF2orF3(pfx)) goto decode_failure;
19443 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
19444 return delta;
19445
19446 case 0x10: /* ADC Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019447 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019448 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
19449 return delta;
19450 case 0x11: /* ADC Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019451 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019452 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
19453 return delta;
19454
19455 case 0x12: /* ADC Eb,Gb */
19456 if (haveF2orF3(pfx)) goto decode_failure;
19457 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
19458 return delta;
19459 case 0x13: /* ADC Ev,Gv */
19460 if (haveF2orF3(pfx)) goto decode_failure;
19461 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
19462 return delta;
19463
19464 case 0x14: /* ADC Ib, AL */
19465 if (haveF2orF3(pfx)) goto decode_failure;
19466 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
19467 return delta;
19468 case 0x15: /* ADC Iv, eAX */
19469 if (haveF2orF3(pfx)) goto decode_failure;
19470 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
19471 return delta;
19472
19473 case 0x18: /* SBB Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019474 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019475 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
19476 return delta;
19477 case 0x19: /* SBB Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019478 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019479 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
19480 return delta;
19481
19482 case 0x1A: /* SBB Eb,Gb */
19483 if (haveF2orF3(pfx)) goto decode_failure;
19484 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
19485 return delta;
19486 case 0x1B: /* SBB Ev,Gv */
19487 if (haveF2orF3(pfx)) goto decode_failure;
19488 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
19489 return delta;
19490
19491 case 0x1C: /* SBB Ib, AL */
19492 if (haveF2orF3(pfx)) goto decode_failure;
19493 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
19494 return delta;
19495 case 0x1D: /* SBB Iv, eAX */
19496 if (haveF2orF3(pfx)) goto decode_failure;
19497 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
19498 return delta;
19499
19500 case 0x20: /* AND Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019501 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019502 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
19503 return delta;
19504 case 0x21: /* AND Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019505 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019506 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
19507 return delta;
19508
19509 case 0x22: /* AND Eb,Gb */
19510 if (haveF2orF3(pfx)) goto decode_failure;
19511 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
19512 return delta;
19513 case 0x23: /* AND Ev,Gv */
19514 if (haveF2orF3(pfx)) goto decode_failure;
19515 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
19516 return delta;
19517
19518 case 0x24: /* AND Ib, AL */
19519 if (haveF2orF3(pfx)) goto decode_failure;
19520 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
19521 return delta;
19522 case 0x25: /* AND Iv, eAX */
19523 if (haveF2orF3(pfx)) goto decode_failure;
19524 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
19525 return delta;
19526
19527 case 0x28: /* SUB Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019528 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019529 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
19530 return delta;
19531 case 0x29: /* SUB Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019532 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019533 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
19534 return delta;
19535
19536 case 0x2A: /* SUB Eb,Gb */
19537 if (haveF2orF3(pfx)) goto decode_failure;
19538 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
19539 return delta;
19540 case 0x2B: /* SUB Ev,Gv */
19541 if (haveF2orF3(pfx)) goto decode_failure;
19542 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
19543 return delta;
19544
19545 case 0x2C: /* SUB Ib, AL */
19546 if (haveF2orF3(pfx)) goto decode_failure;
19547 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
19548 return delta;
sewardj80611e32012-01-20 13:07:24 +000019549 case 0x2D: /* SUB Iv, eAX */
19550 if (haveF2orF3(pfx)) goto decode_failure;
19551 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
19552 return delta;
19553
19554 case 0x30: /* XOR Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019555 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019556 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
19557 return delta;
19558 case 0x31: /* XOR Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019559 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019560 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
19561 return delta;
19562
19563 case 0x32: /* XOR Eb,Gb */
19564 if (haveF2orF3(pfx)) goto decode_failure;
19565 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
19566 return delta;
19567 case 0x33: /* XOR Ev,Gv */
19568 if (haveF2orF3(pfx)) goto decode_failure;
19569 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
19570 return delta;
19571
19572 case 0x34: /* XOR Ib, AL */
19573 if (haveF2orF3(pfx)) goto decode_failure;
19574 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
19575 return delta;
19576 case 0x35: /* XOR Iv, eAX */
19577 if (haveF2orF3(pfx)) goto decode_failure;
19578 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
19579 return delta;
19580
19581 case 0x38: /* CMP Gb,Eb */
19582 if (haveF2orF3(pfx)) goto decode_failure;
19583 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
19584 return delta;
19585 case 0x39: /* CMP Gv,Ev */
19586 if (haveF2orF3(pfx)) goto decode_failure;
19587 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
19588 return delta;
19589
19590 case 0x3A: /* CMP Eb,Gb */
19591 if (haveF2orF3(pfx)) goto decode_failure;
19592 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
19593 return delta;
19594 case 0x3B: /* CMP Ev,Gv */
19595 if (haveF2orF3(pfx)) goto decode_failure;
19596 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
19597 return delta;
19598
19599 case 0x3C: /* CMP Ib, AL */
19600 if (haveF2orF3(pfx)) goto decode_failure;
19601 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
19602 return delta;
19603 case 0x3D: /* CMP Iv, eAX */
19604 if (haveF2orF3(pfx)) goto decode_failure;
19605 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
19606 return delta;
19607
19608 case 0x50: /* PUSH eAX */
19609 case 0x51: /* PUSH eCX */
19610 case 0x52: /* PUSH eDX */
19611 case 0x53: /* PUSH eBX */
19612 case 0x55: /* PUSH eBP */
19613 case 0x56: /* PUSH eSI */
19614 case 0x57: /* PUSH eDI */
19615 case 0x54: /* PUSH eSP */
19616 /* This is the Right Way, in that the value to be pushed is
19617 established before %rsp is changed, so that pushq %rsp
19618 correctly pushes the old value. */
19619 if (haveF2orF3(pfx)) goto decode_failure;
19620 vassert(sz == 2 || sz == 4 || sz == 8);
19621 if (sz == 4)
19622 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
19623 ty = sz==2 ? Ity_I16 : Ity_I64;
19624 t1 = newTemp(ty);
19625 t2 = newTemp(Ity_I64);
19626 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
19627 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
19628 putIReg64(R_RSP, mkexpr(t2) );
19629 storeLE(mkexpr(t2),mkexpr(t1));
19630 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
19631 return delta;
19632
19633 case 0x58: /* POP eAX */
19634 case 0x59: /* POP eCX */
19635 case 0x5A: /* POP eDX */
19636 case 0x5B: /* POP eBX */
19637 case 0x5D: /* POP eBP */
19638 case 0x5E: /* POP eSI */
19639 case 0x5F: /* POP eDI */
19640 case 0x5C: /* POP eSP */
19641 if (haveF2orF3(pfx)) goto decode_failure;
19642 vassert(sz == 2 || sz == 4 || sz == 8);
19643 if (sz == 4)
19644 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
19645 t1 = newTemp(szToITy(sz));
19646 t2 = newTemp(Ity_I64);
19647 assign(t2, getIReg64(R_RSP));
19648 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
19649 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
19650 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
19651 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
19652 return delta;
19653
19654 case 0x63: /* MOVSX */
19655 if (haveF2orF3(pfx)) goto decode_failure;
19656 if (haveREX(pfx) && 1==getRexW(pfx)) {
19657 vassert(sz == 8);
19658 /* movsx r/m32 to r64 */
19659 modrm = getUChar(delta);
19660 if (epartIsReg(modrm)) {
19661 delta++;
19662 putIRegG(8, pfx, modrm,
19663 unop(Iop_32Sto64,
19664 getIRegE(4, pfx, modrm)));
19665 DIP("movslq %s,%s\n",
19666 nameIRegE(4, pfx, modrm),
19667 nameIRegG(8, pfx, modrm));
19668 return delta;
19669 } else {
19670 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19671 delta += alen;
19672 putIRegG(8, pfx, modrm,
19673 unop(Iop_32Sto64,
19674 loadLE(Ity_I32, mkexpr(addr))));
19675 DIP("movslq %s,%s\n", dis_buf,
19676 nameIRegG(8, pfx, modrm));
19677 return delta;
19678 }
19679 } else {
19680 goto decode_failure;
19681 }
19682
19683 case 0x68: /* PUSH Iv */
19684 if (haveF2orF3(pfx)) goto decode_failure;
19685 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
19686 if (sz == 4) sz = 8;
19687 d64 = getSDisp(imin(4,sz),delta);
19688 delta += imin(4,sz);
19689 goto do_push_I;
19690
19691 case 0x69: /* IMUL Iv, Ev, Gv */
19692 if (haveF2orF3(pfx)) goto decode_failure;
19693 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
19694 return delta;
19695
19696 case 0x6A: /* PUSH Ib, sign-extended to sz */
19697 if (haveF2orF3(pfx)) goto decode_failure;
19698 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
19699 if (sz == 4) sz = 8;
19700 d64 = getSDisp8(delta); delta += 1;
19701 goto do_push_I;
19702 do_push_I:
19703 ty = szToITy(sz);
19704 t1 = newTemp(Ity_I64);
19705 t2 = newTemp(ty);
19706 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
19707 putIReg64(R_RSP, mkexpr(t1) );
19708 /* stop mkU16 asserting if d32 is a negative 16-bit number
19709 (bug #132813) */
19710 if (ty == Ity_I16)
19711 d64 &= 0xFFFF;
19712 storeLE( mkexpr(t1), mkU(ty,d64) );
19713 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
19714 return delta;
19715
19716 case 0x6B: /* IMUL Ib, Ev, Gv */
19717 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
19718 return delta;
19719
19720 case 0x70:
19721 case 0x71:
19722 case 0x72: /* JBb/JNAEb (jump below) */
19723 case 0x73: /* JNBb/JAEb (jump not below) */
19724 case 0x74: /* JZb/JEb (jump zero) */
19725 case 0x75: /* JNZb/JNEb (jump not zero) */
19726 case 0x76: /* JBEb/JNAb (jump below or equal) */
19727 case 0x77: /* JNBEb/JAb (jump not below or equal) */
19728 case 0x78: /* JSb (jump negative) */
19729 case 0x79: /* JSb (jump not negative) */
19730 case 0x7A: /* JP (jump parity even) */
19731 case 0x7B: /* JNP/JPO (jump parity odd) */
19732 case 0x7C: /* JLb/JNGEb (jump less) */
19733 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
19734 case 0x7E: /* JLEb/JNGb (jump less or equal) */
19735 case 0x7F: { /* JGb/JNLEb (jump greater) */
19736 Long jmpDelta;
florian55085f82012-11-21 00:36:55 +000019737 const HChar* comment = "";
mjw67ac3fd2014-05-09 11:41:06 +000019738 if (haveF3(pfx)) goto decode_failure;
19739 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000019740 jmpDelta = getSDisp8(delta);
19741 vassert(-128 <= jmpDelta && jmpDelta < 128);
19742 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
19743 delta++;
19744 if (resteerCisOk
19745 && vex_control.guest_chase_cond
19746 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19747 && jmpDelta < 0
florianbeac5302014-12-31 12:09:38 +000019748 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
sewardj80611e32012-01-20 13:07:24 +000019749 /* Speculation: assume this backward branch is taken. So we
19750 need to emit a side-exit to the insn following this one,
19751 on the negation of the condition, and continue at the
19752 branch target address (d64). If we wind up back at the
19753 first instruction of the trace, just stop; it's better to
19754 let the IR loop unroller handle that case. */
19755 stmt( IRStmt_Exit(
19756 mk_amd64g_calculate_condition(
19757 (AMD64Condcode)(1 ^ (opc - 0x70))),
19758 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019759 IRConst_U64(guest_RIP_bbstart+delta),
19760 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000019761 dres->whatNext = Dis_ResteerC;
19762 dres->continueAt = d64;
19763 comment = "(assumed taken)";
19764 }
19765 else
19766 if (resteerCisOk
19767 && vex_control.guest_chase_cond
19768 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19769 && jmpDelta >= 0
19770 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
19771 /* Speculation: assume this forward branch is not taken. So
19772 we need to emit a side-exit to d64 (the dest) and continue
19773 disassembling at the insn immediately following this
19774 one. */
19775 stmt( IRStmt_Exit(
19776 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
19777 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019778 IRConst_U64(d64),
19779 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000019780 dres->whatNext = Dis_ResteerC;
19781 dres->continueAt = guest_RIP_bbstart+delta;
19782 comment = "(assumed not taken)";
19783 }
19784 else {
19785 /* Conservative default translation - end the block at this
19786 point. */
sewardjc6f970f2012-04-02 21:54:49 +000019787 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
19788 guest_RIP_bbstart+delta, d64 );
19789 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019790 }
19791 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
19792 return delta;
19793 }
19794
19795 case 0x80: /* Grp1 Ib,Eb */
sewardj80611e32012-01-20 13:07:24 +000019796 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019797 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
19798 just one for the mem case and also require LOCK in this case.
19799 Note that this erroneously allows XACQ/XREL on CMP since we
19800 don't check the subopcode here. No big deal. */
19801 if (epartIsReg(modrm) && haveF2orF3(pfx))
19802 goto decode_failure;
19803 if (!epartIsReg(modrm) && haveF2andF3(pfx))
19804 goto decode_failure;
19805 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
19806 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019807 am_sz = lengthAMode(pfx,delta);
19808 sz = 1;
19809 d_sz = 1;
19810 d64 = getSDisp8(delta + am_sz);
19811 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19812 return delta;
19813
19814 case 0x81: /* Grp1 Iv,Ev */
sewardj80611e32012-01-20 13:07:24 +000019815 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019816 /* Same comment as for case 0x80 just above. */
19817 if (epartIsReg(modrm) && haveF2orF3(pfx))
19818 goto decode_failure;
19819 if (!epartIsReg(modrm) && haveF2andF3(pfx))
19820 goto decode_failure;
19821 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
19822 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019823 am_sz = lengthAMode(pfx,delta);
19824 d_sz = imin(sz,4);
19825 d64 = getSDisp(d_sz, delta + am_sz);
19826 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19827 return delta;
19828
19829 case 0x83: /* Grp1 Ib,Ev */
19830 if (haveF2orF3(pfx)) goto decode_failure;
19831 modrm = getUChar(delta);
19832 am_sz = lengthAMode(pfx,delta);
19833 d_sz = 1;
19834 d64 = getSDisp8(delta + am_sz);
19835 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19836 return delta;
19837
19838 case 0x84: /* TEST Eb,Gb */
19839 if (haveF2orF3(pfx)) goto decode_failure;
19840 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
19841 return delta;
19842
19843 case 0x85: /* TEST Ev,Gv */
19844 if (haveF2orF3(pfx)) goto decode_failure;
19845 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
19846 return delta;
19847
19848 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
sewardj38b1d692013-10-15 17:21:42 +000019849 prefix. Therefore, generate CAS regardless of the presence or
19850 otherwise of a LOCK prefix. */
sewardj80611e32012-01-20 13:07:24 +000019851 case 0x86: /* XCHG Gb,Eb */
19852 sz = 1;
19853 /* Fall through ... */
19854 case 0x87: /* XCHG Gv,Ev */
sewardj80611e32012-01-20 13:07:24 +000019855 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019856 /* Check whether F2 or F3 are allowable. For the mem case, one
19857 or the othter but not both are. We don't care about the
19858 presence of LOCK in this case -- XCHG is unusual in this
19859 respect. */
19860 if (haveF2orF3(pfx)) {
19861 if (epartIsReg(modrm)) {
19862 goto decode_failure;
19863 } else {
19864 if (haveF2andF3(pfx))
19865 goto decode_failure;
19866 }
19867 }
sewardj80611e32012-01-20 13:07:24 +000019868 ty = szToITy(sz);
19869 t1 = newTemp(ty); t2 = newTemp(ty);
19870 if (epartIsReg(modrm)) {
19871 assign(t1, getIRegE(sz, pfx, modrm));
19872 assign(t2, getIRegG(sz, pfx, modrm));
19873 putIRegG(sz, pfx, modrm, mkexpr(t1));
19874 putIRegE(sz, pfx, modrm, mkexpr(t2));
19875 delta++;
19876 DIP("xchg%c %s, %s\n",
19877 nameISize(sz), nameIRegG(sz, pfx, modrm),
19878 nameIRegE(sz, pfx, modrm));
19879 } else {
19880 *expect_CAS = True;
19881 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19882 assign( t1, loadLE(ty, mkexpr(addr)) );
19883 assign( t2, getIRegG(sz, pfx, modrm) );
19884 casLE( mkexpr(addr),
19885 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
19886 putIRegG( sz, pfx, modrm, mkexpr(t1) );
19887 delta += alen;
19888 DIP("xchg%c %s, %s\n", nameISize(sz),
19889 nameIRegG(sz, pfx, modrm), dis_buf);
19890 }
19891 return delta;
19892
sewardj38b1d692013-10-15 17:21:42 +000019893 case 0x88: { /* MOV Gb,Eb */
19894 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
19895 Bool ok = True;
19896 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
19897 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019898 return delta;
sewardj38b1d692013-10-15 17:21:42 +000019899 }
sewardj80611e32012-01-20 13:07:24 +000019900
sewardj38b1d692013-10-15 17:21:42 +000019901 case 0x89: { /* MOV Gv,Ev */
19902 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
19903 Bool ok = True;
19904 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
19905 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019906 return delta;
sewardj38b1d692013-10-15 17:21:42 +000019907 }
sewardj80611e32012-01-20 13:07:24 +000019908
19909 case 0x8A: /* MOV Eb,Gb */
19910 if (haveF2orF3(pfx)) goto decode_failure;
19911 delta = dis_mov_E_G(vbi, pfx, 1, delta);
19912 return delta;
19913
19914 case 0x8B: /* MOV Ev,Gv */
19915 if (haveF2orF3(pfx)) goto decode_failure;
19916 delta = dis_mov_E_G(vbi, pfx, sz, delta);
19917 return delta;
19918
19919 case 0x8D: /* LEA M,Gv */
19920 if (haveF2orF3(pfx)) goto decode_failure;
19921 if (sz != 4 && sz != 8)
19922 goto decode_failure;
19923 modrm = getUChar(delta);
19924 if (epartIsReg(modrm))
19925 goto decode_failure;
19926 /* NOTE! this is the one place where a segment override prefix
19927 has no effect on the address calculation. Therefore we clear
19928 any segment override bits in pfx. */
19929 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
19930 delta += alen;
19931 /* This is a hack. But it isn't clear that really doing the
19932 calculation at 32 bits is really worth it. Hence for leal,
19933 do the full 64-bit calculation and then truncate it. */
19934 putIRegG( sz, pfx, modrm,
19935 sz == 4
19936 ? unop(Iop_64to32, mkexpr(addr))
19937 : mkexpr(addr)
19938 );
19939 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
19940 nameIRegG(sz,pfx,modrm));
19941 return delta;
19942
19943 case 0x8F: { /* POPQ m64 / POPW m16 */
19944 Int len;
19945 UChar rm;
19946 /* There is no encoding for 32-bit pop in 64-bit mode.
19947 So sz==4 actually means sz==8. */
19948 if (haveF2orF3(pfx)) goto decode_failure;
19949 vassert(sz == 2 || sz == 4
19950 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
19951 if (sz == 4) sz = 8;
19952 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
19953
19954 rm = getUChar(delta);
19955
19956 /* make sure this instruction is correct POP */
19957 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
19958 goto decode_failure;
19959 /* and has correct size */
19960 vassert(sz == 8);
19961
19962 t1 = newTemp(Ity_I64);
19963 t3 = newTemp(Ity_I64);
19964 assign( t1, getIReg64(R_RSP) );
19965 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
19966
19967 /* Increase RSP; must be done before the STORE. Intel manual
19968 says: If the RSP register is used as a base register for
19969 addressing a destination operand in memory, the POP
19970 instruction computes the effective address of the operand
19971 after it increments the RSP register. */
19972 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
19973
19974 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
19975 storeLE( mkexpr(addr), mkexpr(t3) );
19976
19977 DIP("popl %s\n", dis_buf);
19978
19979 delta += len;
19980 return delta;
19981 }
19982
19983 case 0x90: /* XCHG eAX,eAX */
19984 /* detect and handle F3 90 (rep nop) specially */
19985 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
19986 DIP("rep nop (P4 pause)\n");
19987 /* "observe" the hint. The Vex client needs to be careful not
19988 to cause very long delays as a result, though. */
sewardjc6f970f2012-04-02 21:54:49 +000019989 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
19990 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019991 return delta;
19992 }
19993 /* detect and handle NOPs specially */
19994 if (/* F2/F3 probably change meaning completely */
19995 !haveF2orF3(pfx)
19996 /* If REX.B is 1, we're not exchanging rAX with itself */
19997 && getRexB(pfx)==0 ) {
19998 DIP("nop\n");
19999 return delta;
20000 }
20001 /* else fall through to normal case. */
20002 case 0x91: /* XCHG rAX,rCX */
20003 case 0x92: /* XCHG rAX,rDX */
20004 case 0x93: /* XCHG rAX,rBX */
20005 case 0x94: /* XCHG rAX,rSP */
20006 case 0x95: /* XCHG rAX,rBP */
20007 case 0x96: /* XCHG rAX,rSI */
20008 case 0x97: /* XCHG rAX,rDI */
20009 /* guard against mutancy */
20010 if (haveF2orF3(pfx)) goto decode_failure;
20011 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20012 return delta;
20013
20014 case 0x98: /* CBW */
20015 if (haveF2orF3(pfx)) goto decode_failure;
20016 if (sz == 8) {
20017 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20018 DIP(/*"cdqe\n"*/"cltq");
20019 return delta;
20020 }
20021 if (sz == 4) {
20022 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20023 DIP("cwtl\n");
20024 return delta;
20025 }
20026 if (sz == 2) {
20027 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20028 DIP("cbw\n");
20029 return delta;
20030 }
20031 goto decode_failure;
20032
20033 case 0x99: /* CWD/CDQ/CQO */
20034 if (haveF2orF3(pfx)) goto decode_failure;
20035 vassert(sz == 2 || sz == 4 || sz == 8);
20036 ty = szToITy(sz);
20037 putIRegRDX( sz,
20038 binop(mkSizedOp(ty,Iop_Sar8),
20039 getIRegRAX(sz),
20040 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20041 DIP(sz == 2 ? "cwd\n"
20042 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20043 : "cqo\n"));
20044 return delta;
20045
20046 case 0x9B: /* FWAIT (X87 insn) */
20047 /* ignore? */
20048 DIP("fwait\n");
20049 return delta;
20050
20051 case 0x9C: /* PUSHF */ {
20052 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20053 mode. So sz==4 actually means sz==8. */
20054 /* 24 July 06: has also been seen with a redundant REX prefix,
20055 so must also allow sz==8. */
20056 if (haveF2orF3(pfx)) goto decode_failure;
20057 vassert(sz == 2 || sz == 4 || sz == 8);
20058 if (sz == 4) sz = 8;
20059 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20060
20061 t1 = newTemp(Ity_I64);
20062 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20063 putIReg64(R_RSP, mkexpr(t1) );
20064
20065 t2 = newTemp(Ity_I64);
20066 assign( t2, mk_amd64g_calculate_rflags_all() );
20067
20068 /* Patch in the D flag. This can simply be a copy of bit 10 of
20069 baseBlock[OFFB_DFLAG]. */
20070 t3 = newTemp(Ity_I64);
20071 assign( t3, binop(Iop_Or64,
20072 mkexpr(t2),
20073 binop(Iop_And64,
20074 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20075 mkU64(1<<10)))
20076 );
20077
20078 /* And patch in the ID flag. */
20079 t4 = newTemp(Ity_I64);
20080 assign( t4, binop(Iop_Or64,
20081 mkexpr(t3),
20082 binop(Iop_And64,
20083 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20084 mkU8(21)),
20085 mkU64(1<<21)))
20086 );
20087
20088 /* And patch in the AC flag too. */
20089 t5 = newTemp(Ity_I64);
20090 assign( t5, binop(Iop_Or64,
20091 mkexpr(t4),
20092 binop(Iop_And64,
20093 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20094 mkU8(18)),
20095 mkU64(1<<18)))
20096 );
20097
20098 /* if sz==2, the stored value needs to be narrowed. */
20099 if (sz == 2)
20100 storeLE( mkexpr(t1), unop(Iop_32to16,
20101 unop(Iop_64to32,mkexpr(t5))) );
20102 else
20103 storeLE( mkexpr(t1), mkexpr(t5) );
20104
20105 DIP("pushf%c\n", nameISize(sz));
20106 return delta;
20107 }
20108
20109 case 0x9D: /* POPF */
20110 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20111 So sz==4 actually means sz==8. */
20112 if (haveF2orF3(pfx)) goto decode_failure;
20113 vassert(sz == 2 || sz == 4);
20114 if (sz == 4) sz = 8;
20115 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20116 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20117 assign(t2, getIReg64(R_RSP));
20118 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20119 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20120 /* t1 is the flag word. Mask out everything except OSZACP and
20121 set the flags thunk to AMD64G_CC_OP_COPY. */
20122 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20123 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20124 stmt( IRStmt_Put( OFFB_CC_DEP1,
20125 binop(Iop_And64,
20126 mkexpr(t1),
20127 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20128 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20129 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20130 )
20131 )
20132 );
20133
20134 /* Also need to set the D flag, which is held in bit 10 of t1.
20135 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20136 stmt( IRStmt_Put(
20137 OFFB_DFLAG,
florian99dd03e2013-01-29 03:56:06 +000020138 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020139 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020140 binop(Iop_And64,
20141 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
sewardj009230b2013-01-26 11:47:55 +000020142 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020143 mkU64(0xFFFFFFFFFFFFFFFFULL),
20144 mkU64(1)))
sewardj80611e32012-01-20 13:07:24 +000020145 );
20146
20147 /* And set the ID flag */
20148 stmt( IRStmt_Put(
20149 OFFB_IDFLAG,
florian99dd03e2013-01-29 03:56:06 +000020150 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020151 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020152 binop(Iop_And64,
20153 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
sewardj009230b2013-01-26 11:47:55 +000020154 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020155 mkU64(1),
20156 mkU64(0)))
sewardj80611e32012-01-20 13:07:24 +000020157 );
20158
20159 /* And set the AC flag too */
20160 stmt( IRStmt_Put(
20161 OFFB_ACFLAG,
florian99dd03e2013-01-29 03:56:06 +000020162 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020163 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020164 binop(Iop_And64,
20165 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
sewardj009230b2013-01-26 11:47:55 +000020166 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020167 mkU64(1),
20168 mkU64(0)))
sewardj80611e32012-01-20 13:07:24 +000020169 );
20170
20171 DIP("popf%c\n", nameISize(sz));
20172 return delta;
20173
20174 case 0x9E: /* SAHF */
20175 codegen_SAHF();
20176 DIP("sahf\n");
20177 return delta;
20178
20179 case 0x9F: /* LAHF */
20180 codegen_LAHF();
20181 DIP("lahf\n");
20182 return delta;
20183
20184 case 0xA0: /* MOV Ob,AL */
20185 if (have66orF2orF3(pfx)) goto decode_failure;
20186 sz = 1;
20187 /* Fall through ... */
20188 case 0xA1: /* MOV Ov,eAX */
20189 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20190 goto decode_failure;
20191 d64 = getDisp64(delta);
20192 delta += 8;
20193 ty = szToITy(sz);
20194 addr = newTemp(Ity_I64);
20195 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20196 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20197 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20198 segRegTxt(pfx), d64,
20199 nameIRegRAX(sz));
20200 return delta;
20201
20202 case 0xA2: /* MOV AL,Ob */
20203 if (have66orF2orF3(pfx)) goto decode_failure;
20204 sz = 1;
20205 /* Fall through ... */
20206 case 0xA3: /* MOV eAX,Ov */
20207 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20208 goto decode_failure;
20209 d64 = getDisp64(delta);
20210 delta += 8;
20211 ty = szToITy(sz);
20212 addr = newTemp(Ity_I64);
20213 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20214 storeLE( mkexpr(addr), getIRegRAX(sz) );
20215 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20216 segRegTxt(pfx), d64);
20217 return delta;
20218
20219 case 0xA4:
20220 case 0xA5:
20221 /* F3 A4: rep movsb */
20222 if (haveF3(pfx) && !haveF2(pfx)) {
20223 if (opc == 0xA4)
20224 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020225 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
sewardj80611e32012-01-20 13:07:24 +000020226 guest_RIP_curr_instr,
20227 guest_RIP_bbstart+delta, "rep movs", pfx );
20228 dres->whatNext = Dis_StopHere;
20229 return delta;
20230 }
20231 /* A4: movsb */
20232 if (!haveF3(pfx) && !haveF2(pfx)) {
20233 if (opc == 0xA4)
20234 sz = 1;
20235 dis_string_op( dis_MOVS, sz, "movs", pfx );
20236 return delta;
20237 }
20238 goto decode_failure;
20239
20240 case 0xA6:
20241 case 0xA7:
20242 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20243 if (haveF3(pfx) && !haveF2(pfx)) {
20244 if (opc == 0xA6)
20245 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020246 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
sewardj80611e32012-01-20 13:07:24 +000020247 guest_RIP_curr_instr,
20248 guest_RIP_bbstart+delta, "repe cmps", pfx );
20249 dres->whatNext = Dis_StopHere;
20250 return delta;
20251 }
20252 goto decode_failure;
20253
20254 case 0xAA:
20255 case 0xAB:
20256 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20257 if (haveF3(pfx) && !haveF2(pfx)) {
20258 if (opc == 0xAA)
20259 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020260 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
sewardj80611e32012-01-20 13:07:24 +000020261 guest_RIP_curr_instr,
20262 guest_RIP_bbstart+delta, "rep stos", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020263 vassert(dres->whatNext == Dis_StopHere);
20264 return delta;
sewardj80611e32012-01-20 13:07:24 +000020265 }
20266 /* AA/AB: stosb/stos{w,l,q} */
20267 if (!haveF3(pfx) && !haveF2(pfx)) {
20268 if (opc == 0xAA)
20269 sz = 1;
20270 dis_string_op( dis_STOS, sz, "stos", pfx );
20271 return delta;
20272 }
20273 goto decode_failure;
20274
20275 case 0xA8: /* TEST Ib, AL */
20276 if (haveF2orF3(pfx)) goto decode_failure;
20277 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20278 return delta;
20279 case 0xA9: /* TEST Iv, eAX */
20280 if (haveF2orF3(pfx)) goto decode_failure;
20281 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20282 return delta;
20283
20284 case 0xAC: /* LODS, no REP prefix */
20285 case 0xAD:
20286 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20287 return delta;
20288
20289 case 0xAE:
20290 case 0xAF:
20291 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20292 if (haveF2(pfx) && !haveF3(pfx)) {
20293 if (opc == 0xAE)
20294 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020295 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000020296 guest_RIP_curr_instr,
20297 guest_RIP_bbstart+delta, "repne scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020298 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020299 return delta;
20300 }
20301 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20302 if (!haveF2(pfx) && haveF3(pfx)) {
20303 if (opc == 0xAE)
20304 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020305 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000020306 guest_RIP_curr_instr,
20307 guest_RIP_bbstart+delta, "repe scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020308 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020309 return delta;
20310 }
20311 /* AE/AF: scasb/scas{w,l,q} */
20312 if (!haveF2(pfx) && !haveF3(pfx)) {
20313 if (opc == 0xAE)
20314 sz = 1;
20315 dis_string_op( dis_SCAS, sz, "scas", pfx );
20316 return delta;
20317 }
20318 goto decode_failure;
20319
20320 /* XXXX be careful here with moves to AH/BH/CH/DH */
20321 case 0xB0: /* MOV imm,AL */
20322 case 0xB1: /* MOV imm,CL */
20323 case 0xB2: /* MOV imm,DL */
20324 case 0xB3: /* MOV imm,BL */
20325 case 0xB4: /* MOV imm,AH */
20326 case 0xB5: /* MOV imm,CH */
20327 case 0xB6: /* MOV imm,DH */
20328 case 0xB7: /* MOV imm,BH */
20329 if (haveF2orF3(pfx)) goto decode_failure;
20330 d64 = getUChar(delta);
20331 delta += 1;
20332 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20333 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20334 return delta;
20335
20336 case 0xB8: /* MOV imm,eAX */
20337 case 0xB9: /* MOV imm,eCX */
20338 case 0xBA: /* MOV imm,eDX */
20339 case 0xBB: /* MOV imm,eBX */
20340 case 0xBC: /* MOV imm,eSP */
20341 case 0xBD: /* MOV imm,eBP */
20342 case 0xBE: /* MOV imm,eSI */
20343 case 0xBF: /* MOV imm,eDI */
20344 /* This is the one-and-only place where 64-bit literals are
20345 allowed in the instruction stream. */
20346 if (haveF2orF3(pfx)) goto decode_failure;
20347 if (sz == 8) {
20348 d64 = getDisp64(delta);
20349 delta += 8;
20350 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20351 DIP("movabsq $%lld,%s\n", (Long)d64,
20352 nameIRegRexB(8,pfx,opc-0xB8));
20353 } else {
20354 d64 = getSDisp(imin(4,sz),delta);
20355 delta += imin(4,sz);
20356 putIRegRexB(sz, pfx, opc-0xB8,
20357 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20358 DIP("mov%c $%lld,%s\n", nameISize(sz),
20359 (Long)d64,
20360 nameIRegRexB(sz,pfx,opc-0xB8));
20361 }
20362 return delta;
20363
20364 case 0xC0: { /* Grp2 Ib,Eb */
20365 Bool decode_OK = True;
20366 if (haveF2orF3(pfx)) goto decode_failure;
20367 modrm = getUChar(delta);
20368 am_sz = lengthAMode(pfx,delta);
20369 d_sz = 1;
20370 d64 = getUChar(delta + am_sz);
20371 sz = 1;
20372 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20373 mkU8(d64 & 0xFF), NULL, &decode_OK );
20374 if (!decode_OK) goto decode_failure;
20375 return delta;
20376 }
20377
20378 case 0xC1: { /* Grp2 Ib,Ev */
20379 Bool decode_OK = True;
20380 if (haveF2orF3(pfx)) goto decode_failure;
20381 modrm = getUChar(delta);
20382 am_sz = lengthAMode(pfx,delta);
20383 d_sz = 1;
20384 d64 = getUChar(delta + am_sz);
20385 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20386 mkU8(d64 & 0xFF), NULL, &decode_OK );
20387 if (!decode_OK) goto decode_failure;
20388 return delta;
20389 }
20390
sewardjdb546602012-02-16 19:09:43 +000020391 case 0xC2: /* RET imm16 */
mjw67ac3fd2014-05-09 11:41:06 +000020392 if (have66orF3(pfx)) goto decode_failure;
20393 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardjdb546602012-02-16 19:09:43 +000020394 d64 = getUDisp16(delta);
20395 delta += 2;
sewardjc6f970f2012-04-02 21:54:49 +000020396 dis_ret(dres, vbi, d64);
sewardjdb546602012-02-16 19:09:43 +000020397 DIP("ret $%lld\n", d64);
20398 return delta;
20399
sewardj80611e32012-01-20 13:07:24 +000020400 case 0xC3: /* RET */
mjw67ac3fd2014-05-09 11:41:06 +000020401 if (have66(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020402 /* F3 is acceptable on AMD. */
mjw67ac3fd2014-05-09 11:41:06 +000020403 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardjc6f970f2012-04-02 21:54:49 +000020404 dis_ret(dres, vbi, 0);
sewardj80611e32012-01-20 13:07:24 +000020405 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20406 return delta;
20407
sewardj1bf44e32013-09-18 18:27:55 +000020408 case 0xC6: /* C6 /0 = MOV Ib,Eb */
sewardj80611e32012-01-20 13:07:24 +000020409 sz = 1;
sewardj1bf44e32013-09-18 18:27:55 +000020410 goto maybe_do_Mov_I_E;
20411 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20412 goto maybe_do_Mov_I_E;
20413 maybe_do_Mov_I_E:
sewardj80611e32012-01-20 13:07:24 +000020414 modrm = getUChar(delta);
sewardj1bf44e32013-09-18 18:27:55 +000020415 if (gregLO3ofRM(modrm) == 0) {
sewardj1bf44e32013-09-18 18:27:55 +000020416 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +000020417 /* Neither F2 nor F3 are allowable. */
20418 if (haveF2orF3(pfx)) goto decode_failure;
sewardj1bf44e32013-09-18 18:27:55 +000020419 delta++; /* mod/rm byte */
20420 d64 = getSDisp(imin(4,sz),delta);
20421 delta += imin(4,sz);
20422 putIRegE(sz, pfx, modrm,
20423 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20424 DIP("mov%c $%lld, %s\n", nameISize(sz),
20425 (Long)d64,
20426 nameIRegE(sz,pfx,modrm));
20427 } else {
sewardj38b1d692013-10-15 17:21:42 +000020428 if (haveF2(pfx)) goto decode_failure;
20429 /* F3(XRELEASE) is allowable here */
sewardj1bf44e32013-09-18 18:27:55 +000020430 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20431 /*xtra*/imin(4,sz) );
20432 delta += alen;
20433 d64 = getSDisp(imin(4,sz),delta);
20434 delta += imin(4,sz);
20435 storeLE(mkexpr(addr),
20436 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20437 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
20438 }
20439 return delta;
sewardj80611e32012-01-20 13:07:24 +000020440 }
sewardja56f3692013-09-18 19:41:43 +000020441 /* BEGIN HACKY SUPPORT FOR xbegin */
mjwd5453bf2013-12-09 12:54:06 +000020442 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
mjw9d690c62013-10-04 22:31:48 +000020443 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardja56f3692013-09-18 19:41:43 +000020444 delta++; /* mod/rm byte */
20445 d64 = getSDisp(4,delta);
20446 delta += 4;
20447 guest_RIP_next_mustcheck = True;
20448 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
20449 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
20450 /* EAX contains the failure status code. Bit 3 is "Set if an
20451 internal buffer overflowed", which seems like the
20452 least-bogus choice we can make here. */
20453 putIRegRAX(4, mkU32(1<<3));
20454 /* And jump to the fail address. */
20455 jmp_lit(dres, Ijk_Boring, failAddr);
20456 vassert(dres->whatNext == Dis_StopHere);
20457 DIP("xbeginq 0x%llx\n", failAddr);
20458 return delta;
20459 }
20460 /* END HACKY SUPPORT FOR xbegin */
mjwd5453bf2013-12-09 12:54:06 +000020461 /* BEGIN HACKY SUPPORT FOR xabort */
20462 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
20463 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20464 delta++; /* mod/rm byte */
20465 abyte = getUChar(delta); delta++;
20466 /* There is never a real transaction in progress, so do nothing. */
20467 DIP("xabort $%d", (Int)abyte);
20468 return delta;
20469 }
20470 /* END HACKY SUPPORT FOR xabort */
sewardj1bf44e32013-09-18 18:27:55 +000020471 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020472
20473 case 0xC8: /* ENTER */
20474 /* Same comments re operand size as for LEAVE below apply.
20475 Also, only handles the case "enter $imm16, $0"; other cases
20476 for the second operand (nesting depth) are not handled. */
20477 if (sz != 4)
20478 goto decode_failure;
20479 d64 = getUDisp16(delta);
20480 delta += 2;
20481 vassert(d64 >= 0 && d64 <= 0xFFFF);
20482 if (getUChar(delta) != 0)
20483 goto decode_failure;
20484 delta++;
20485 /* Intel docs seem to suggest:
20486 push rbp
20487 temp = rsp
20488 rbp = temp
20489 rsp = rsp - imm16
20490 */
20491 t1 = newTemp(Ity_I64);
20492 assign(t1, getIReg64(R_RBP));
20493 t2 = newTemp(Ity_I64);
20494 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
20495 putIReg64(R_RSP, mkexpr(t2));
20496 storeLE(mkexpr(t2), mkexpr(t1));
20497 putIReg64(R_RBP, mkexpr(t2));
20498 if (d64 > 0) {
20499 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
20500 }
20501 DIP("enter $%u, $0\n", (UInt)d64);
20502 return delta;
20503
20504 case 0xC9: /* LEAVE */
20505 /* In 64-bit mode this defaults to a 64-bit operand size. There
20506 is no way to encode a 32-bit variant. Hence sz==4 but we do
20507 it as if sz=8. */
20508 if (sz != 4)
20509 goto decode_failure;
20510 t1 = newTemp(Ity_I64);
20511 t2 = newTemp(Ity_I64);
20512 assign(t1, getIReg64(R_RBP));
20513 /* First PUT RSP looks redundant, but need it because RSP must
20514 always be up-to-date for Memcheck to work... */
20515 putIReg64(R_RSP, mkexpr(t1));
20516 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
20517 putIReg64(R_RBP, mkexpr(t2));
20518 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
20519 DIP("leave\n");
20520 return delta;
20521
20522 case 0xCC: /* INT 3 */
sewardjc6f970f2012-04-02 21:54:49 +000020523 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
20524 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020525 DIP("int $0x3\n");
20526 return delta;
20527
20528 case 0xD0: { /* Grp2 1,Eb */
20529 Bool decode_OK = True;
20530 if (haveF2orF3(pfx)) goto decode_failure;
20531 modrm = getUChar(delta);
20532 am_sz = lengthAMode(pfx,delta);
20533 d_sz = 0;
20534 d64 = 1;
20535 sz = 1;
20536 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20537 mkU8(d64), NULL, &decode_OK );
20538 if (!decode_OK) goto decode_failure;
20539 return delta;
20540 }
20541
20542 case 0xD1: { /* Grp2 1,Ev */
20543 Bool decode_OK = True;
20544 if (haveF2orF3(pfx)) goto decode_failure;
20545 modrm = getUChar(delta);
20546 am_sz = lengthAMode(pfx,delta);
20547 d_sz = 0;
20548 d64 = 1;
20549 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20550 mkU8(d64), NULL, &decode_OK );
20551 if (!decode_OK) goto decode_failure;
20552 return delta;
20553 }
20554
20555 case 0xD2: { /* Grp2 CL,Eb */
20556 Bool decode_OK = True;
20557 if (haveF2orF3(pfx)) goto decode_failure;
20558 modrm = getUChar(delta);
20559 am_sz = lengthAMode(pfx,delta);
20560 d_sz = 0;
20561 sz = 1;
20562 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20563 getIRegCL(), "%cl", &decode_OK );
20564 if (!decode_OK) goto decode_failure;
20565 return delta;
20566 }
20567
20568 case 0xD3: { /* Grp2 CL,Ev */
20569 Bool decode_OK = True;
20570 if (haveF2orF3(pfx)) goto decode_failure;
20571 modrm = getUChar(delta);
20572 am_sz = lengthAMode(pfx,delta);
20573 d_sz = 0;
20574 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20575 getIRegCL(), "%cl", &decode_OK );
20576 if (!decode_OK) goto decode_failure;
20577 return delta;
20578 }
20579
20580 case 0xD8: /* X87 instructions */
20581 case 0xD9:
20582 case 0xDA:
20583 case 0xDB:
20584 case 0xDC:
20585 case 0xDD:
20586 case 0xDE:
20587 case 0xDF: {
20588 Bool redundantREXWok = False;
20589
20590 if (haveF2orF3(pfx))
20591 goto decode_failure;
20592
20593 /* kludge to tolerate redundant rex.w prefixes (should do this
20594 properly one day) */
20595 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
20596 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
20597 redundantREXWok = True;
20598
sewardj9ae42a72012-02-16 14:18:56 +000020599 Bool size_OK = False;
20600 if ( sz == 4 )
20601 size_OK = True;
20602 else if ( sz == 8 )
20603 size_OK = redundantREXWok;
20604 else if ( sz == 2 ) {
20605 int mod_rm = getUChar(delta+0);
20606 int reg = gregLO3ofRM(mod_rm);
20607 /* The HotSpot JVM uses these */
20608 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
20609 reg == 4 /* FNSAVE */ ||
20610 reg == 6 /* FRSTOR */ ) )
20611 size_OK = True;
sewardj80611e32012-01-20 13:07:24 +000020612 }
sewardj9ae42a72012-02-16 14:18:56 +000020613 /* AMD manual says 0x66 size override is ignored, except where
20614 it is meaningful */
20615 if (!size_OK)
20616 goto decode_failure;
20617
20618 Bool decode_OK = False;
20619 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
20620 if (!decode_OK)
20621 goto decode_failure;
20622
sewardj80611e32012-01-20 13:07:24 +000020623 return delta;
20624 }
20625
20626 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
20627 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
20628 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
20629 { /* The docs say this uses rCX as a count depending on the
20630 address size override, not the operand one. */
20631 IRExpr* zbit = NULL;
20632 IRExpr* count = NULL;
20633 IRExpr* cond = NULL;
florian55085f82012-11-21 00:36:55 +000020634 const HChar* xtra = NULL;
sewardj80611e32012-01-20 13:07:24 +000020635
20636 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
20637 /* So at this point we've rejected any variants which appear to
20638 be governed by the usual operand-size modifiers. Hence only
20639 the address size prefix can have an effect. It changes the
20640 size from 64 (default) to 32. */
20641 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
20642 delta++;
20643 if (haveASO(pfx)) {
20644 /* 64to32 of 64-bit get is merely a get-put improvement
20645 trick. */
20646 putIReg32(R_RCX, binop(Iop_Sub32,
20647 unop(Iop_64to32, getIReg64(R_RCX)),
20648 mkU32(1)));
20649 } else {
20650 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
20651 }
20652
20653 /* This is correct, both for 32- and 64-bit versions. If we're
20654 doing a 32-bit dec and the result is zero then the default
20655 zero extension rule will cause the upper 32 bits to be zero
20656 too. Hence a 64-bit check against zero is OK. */
20657 count = getIReg64(R_RCX);
20658 cond = binop(Iop_CmpNE64, count, mkU64(0));
20659 switch (opc) {
20660 case 0xE2:
20661 xtra = "";
20662 break;
20663 case 0xE1:
20664 xtra = "e";
20665 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
20666 cond = mkAnd1(cond, zbit);
20667 break;
20668 case 0xE0:
20669 xtra = "ne";
20670 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
20671 cond = mkAnd1(cond, zbit);
20672 break;
20673 default:
sewardjcc3d2192013-03-27 11:37:33 +000020674 vassert(0);
sewardj80611e32012-01-20 13:07:24 +000020675 }
sewardjc6f970f2012-04-02 21:54:49 +000020676 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
sewardj80611e32012-01-20 13:07:24 +000020677
20678 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
20679 return delta;
20680 }
20681
20682 case 0xE3:
20683 /* JRCXZ or JECXZ, depending address size override. */
20684 if (have66orF2orF3(pfx)) goto decode_failure;
20685 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
20686 delta++;
20687 if (haveASO(pfx)) {
20688 /* 32-bit */
20689 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
sewardjc6f970f2012-04-02 21:54:49 +000020690 unop(Iop_32Uto64, getIReg32(R_RCX)),
20691 mkU64(0)),
20692 Ijk_Boring,
20693 IRConst_U64(d64),
20694 OFFB_RIP
20695 ));
sewardj80611e32012-01-20 13:07:24 +000020696 DIP("jecxz 0x%llx\n", d64);
20697 } else {
20698 /* 64-bit */
20699 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
20700 getIReg64(R_RCX),
20701 mkU64(0)),
sewardjc6f970f2012-04-02 21:54:49 +000020702 Ijk_Boring,
20703 IRConst_U64(d64),
20704 OFFB_RIP
20705 ));
sewardj80611e32012-01-20 13:07:24 +000020706 DIP("jrcxz 0x%llx\n", d64);
20707 }
20708 return delta;
20709
20710 case 0xE4: /* IN imm8, AL */
20711 sz = 1;
20712 t1 = newTemp(Ity_I64);
20713 abyte = getUChar(delta); delta++;
20714 assign(t1, mkU64( abyte & 0xFF ));
20715 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
20716 goto do_IN;
20717 case 0xE5: /* IN imm8, eAX */
20718 if (!(sz == 2 || sz == 4)) goto decode_failure;
20719 t1 = newTemp(Ity_I64);
20720 abyte = getUChar(delta); delta++;
20721 assign(t1, mkU64( abyte & 0xFF ));
20722 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
20723 goto do_IN;
20724 case 0xEC: /* IN %DX, AL */
20725 sz = 1;
20726 t1 = newTemp(Ity_I64);
20727 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
20728 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
20729 nameIRegRAX(sz));
20730 goto do_IN;
20731 case 0xED: /* IN %DX, eAX */
20732 if (!(sz == 2 || sz == 4)) goto decode_failure;
20733 t1 = newTemp(Ity_I64);
20734 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
20735 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
20736 nameIRegRAX(sz));
20737 goto do_IN;
20738 do_IN: {
20739 /* At this point, sz indicates the width, and t1 is a 64-bit
20740 value giving port number. */
20741 IRDirty* d;
20742 if (haveF2orF3(pfx)) goto decode_failure;
20743 vassert(sz == 1 || sz == 2 || sz == 4);
20744 ty = szToITy(sz);
20745 t2 = newTemp(Ity_I64);
20746 d = unsafeIRDirty_1_N(
20747 t2,
20748 0/*regparms*/,
20749 "amd64g_dirtyhelper_IN",
20750 &amd64g_dirtyhelper_IN,
20751 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
20752 );
20753 /* do the call, dumping the result in t2. */
20754 stmt( IRStmt_Dirty(d) );
20755 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
20756 return delta;
20757 }
20758
20759 case 0xE6: /* OUT AL, imm8 */
20760 sz = 1;
20761 t1 = newTemp(Ity_I64);
20762 abyte = getUChar(delta); delta++;
20763 assign( t1, mkU64( abyte & 0xFF ) );
20764 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
20765 goto do_OUT;
20766 case 0xE7: /* OUT eAX, imm8 */
20767 if (!(sz == 2 || sz == 4)) goto decode_failure;
20768 t1 = newTemp(Ity_I64);
20769 abyte = getUChar(delta); delta++;
20770 assign( t1, mkU64( abyte & 0xFF ) );
20771 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
20772 goto do_OUT;
20773 case 0xEE: /* OUT AL, %DX */
20774 sz = 1;
20775 t1 = newTemp(Ity_I64);
20776 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
20777 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
20778 nameIRegRDX(2));
20779 goto do_OUT;
20780 case 0xEF: /* OUT eAX, %DX */
20781 if (!(sz == 2 || sz == 4)) goto decode_failure;
20782 t1 = newTemp(Ity_I64);
20783 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
20784 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
20785 nameIRegRDX(2));
20786 goto do_OUT;
20787 do_OUT: {
20788 /* At this point, sz indicates the width, and t1 is a 64-bit
20789 value giving port number. */
20790 IRDirty* d;
20791 if (haveF2orF3(pfx)) goto decode_failure;
20792 vassert(sz == 1 || sz == 2 || sz == 4);
20793 ty = szToITy(sz);
20794 d = unsafeIRDirty_0_N(
20795 0/*regparms*/,
20796 "amd64g_dirtyhelper_OUT",
20797 &amd64g_dirtyhelper_OUT,
20798 mkIRExprVec_3( mkexpr(t1),
20799 widenUto64( getIRegRAX(sz) ),
20800 mkU64(sz) )
20801 );
20802 stmt( IRStmt_Dirty(d) );
20803 return delta;
20804 }
20805
20806 case 0xE8: /* CALL J4 */
mjw67ac3fd2014-05-09 11:41:06 +000020807 if (haveF3(pfx)) goto decode_failure;
20808 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020809 d64 = getSDisp32(delta); delta += 4;
20810 d64 += (guest_RIP_bbstart+delta);
20811 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
20812 t1 = newTemp(Ity_I64);
20813 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
20814 putIReg64(R_RSP, mkexpr(t1));
20815 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
20816 t2 = newTemp(Ity_I64);
20817 assign(t2, mkU64((Addr64)d64));
20818 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
20819 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
20820 /* follow into the call target. */
20821 dres->whatNext = Dis_ResteerU;
20822 dres->continueAt = d64;
20823 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020824 jmp_lit(dres, Ijk_Call, d64);
20825 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020826 }
20827 DIP("call 0x%llx\n",d64);
20828 return delta;
20829
20830 case 0xE9: /* Jv (jump, 16/32 offset) */
mjw67ac3fd2014-05-09 11:41:06 +000020831 if (haveF3(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020832 if (sz != 4)
20833 goto decode_failure; /* JRS added 2004 July 11 */
mjw67ac3fd2014-05-09 11:41:06 +000020834 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020835 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
20836 delta += sz;
florianbeac5302014-12-31 12:09:38 +000020837 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
sewardj80611e32012-01-20 13:07:24 +000020838 dres->whatNext = Dis_ResteerU;
20839 dres->continueAt = d64;
20840 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020841 jmp_lit(dres, Ijk_Boring, d64);
20842 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020843 }
20844 DIP("jmp 0x%llx\n", d64);
20845 return delta;
20846
20847 case 0xEB: /* Jb (jump, byte offset) */
mjw67ac3fd2014-05-09 11:41:06 +000020848 if (haveF3(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020849 if (sz != 4)
20850 goto decode_failure; /* JRS added 2004 July 11 */
mjw67ac3fd2014-05-09 11:41:06 +000020851 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020852 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
20853 delta++;
florianbeac5302014-12-31 12:09:38 +000020854 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
sewardj80611e32012-01-20 13:07:24 +000020855 dres->whatNext = Dis_ResteerU;
20856 dres->continueAt = d64;
20857 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020858 jmp_lit(dres, Ijk_Boring, d64);
20859 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020860 }
20861 DIP("jmp-8 0x%llx\n", d64);
20862 return delta;
20863
20864 case 0xF5: /* CMC */
20865 case 0xF8: /* CLC */
20866 case 0xF9: /* STC */
20867 t1 = newTemp(Ity_I64);
20868 t2 = newTemp(Ity_I64);
20869 assign( t1, mk_amd64g_calculate_rflags_all() );
20870 switch (opc) {
20871 case 0xF5:
20872 assign( t2, binop(Iop_Xor64, mkexpr(t1),
20873 mkU64(AMD64G_CC_MASK_C)));
20874 DIP("cmc\n");
20875 break;
20876 case 0xF8:
20877 assign( t2, binop(Iop_And64, mkexpr(t1),
20878 mkU64(~AMD64G_CC_MASK_C)));
20879 DIP("clc\n");
20880 break;
20881 case 0xF9:
20882 assign( t2, binop(Iop_Or64, mkexpr(t1),
20883 mkU64(AMD64G_CC_MASK_C)));
20884 DIP("stc\n");
20885 break;
20886 default:
20887 vpanic("disInstr(x64)(cmc/clc/stc)");
20888 }
20889 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20890 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20891 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
20892 /* Set NDEP even though it isn't used. This makes redundant-PUT
20893 elimination of previous stores to this field work better. */
20894 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
20895 return delta;
20896
20897 case 0xF6: { /* Grp3 Eb */
20898 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020899 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20900 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020901 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
20902 if (!decode_OK) goto decode_failure;
20903 return delta;
20904 }
20905
20906 case 0xF7: { /* Grp3 Ev */
20907 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020908 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20909 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020910 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
20911 if (!decode_OK) goto decode_failure;
20912 return delta;
20913 }
20914
20915 case 0xFC: /* CLD */
20916 if (haveF2orF3(pfx)) goto decode_failure;
20917 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
20918 DIP("cld\n");
20919 return delta;
20920
20921 case 0xFD: /* STD */
20922 if (haveF2orF3(pfx)) goto decode_failure;
20923 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
20924 DIP("std\n");
20925 return delta;
20926
20927 case 0xFE: { /* Grp4 Eb */
20928 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020929 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20930 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020931 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
20932 if (!decode_OK) goto decode_failure;
20933 return delta;
20934 }
20935
20936 case 0xFF: { /* Grp5 Ev */
20937 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020938 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20939 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020940 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
20941 if (!decode_OK) goto decode_failure;
20942 return delta;
20943 }
20944
20945 default:
20946 break;
20947
20948 }
20949
20950 decode_failure:
20951 return deltaIN; /* fail */
20952}
20953
20954
20955/*------------------------------------------------------------*/
20956/*--- ---*/
20957/*--- Top-level post-escape decoders: dis_ESC_0F ---*/
20958/*--- ---*/
20959/*------------------------------------------------------------*/
20960
sewardjf85e1772012-07-15 10:11:10 +000020961static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
20962{
20963 IRTemp t2 = newTemp(ty);
20964 if (ty == Ity_I64) {
20965 IRTemp m8 = newTemp(Ity_I64);
20966 IRTemp s8 = newTemp(Ity_I64);
20967 IRTemp m16 = newTemp(Ity_I64);
20968 IRTemp s16 = newTemp(Ity_I64);
20969 IRTemp m32 = newTemp(Ity_I64);
20970 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
20971 assign( s8,
20972 binop(Iop_Or64,
20973 binop(Iop_Shr64,
20974 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
20975 mkU8(8)),
20976 binop(Iop_And64,
20977 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
20978 mkexpr(m8))
20979 )
20980 );
20981
20982 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
20983 assign( s16,
20984 binop(Iop_Or64,
20985 binop(Iop_Shr64,
20986 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
20987 mkU8(16)),
20988 binop(Iop_And64,
20989 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
20990 mkexpr(m16))
20991 )
20992 );
20993
20994 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
20995 assign( t2,
20996 binop(Iop_Or64,
20997 binop(Iop_Shr64,
20998 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
20999 mkU8(32)),
21000 binop(Iop_And64,
21001 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21002 mkexpr(m32))
21003 )
21004 );
21005 return t2;
21006 }
21007 if (ty == Ity_I32) {
21008 assign( t2,
21009 binop(
21010 Iop_Or32,
21011 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21012 binop(
21013 Iop_Or32,
21014 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21015 mkU32(0x00FF0000)),
21016 binop(Iop_Or32,
21017 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21018 mkU32(0x0000FF00)),
21019 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21020 mkU32(0x000000FF) )
21021 )))
21022 );
21023 return t2;
21024 }
21025 if (ty == Ity_I16) {
21026 assign(t2,
21027 binop(Iop_Or16,
21028 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21029 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21030 return t2;
21031 }
21032 vassert(0);
21033 /*NOTREACHED*/
21034 return IRTemp_INVALID;
21035}
21036
21037
sewardj80611e32012-01-20 13:07:24 +000021038__attribute__((noinline))
21039static
21040Long dis_ESC_0F (
21041 /*MB_OUT*/DisResult* dres,
21042 /*MB_OUT*/Bool* expect_CAS,
florianbeac5302014-12-31 12:09:38 +000021043 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardj80611e32012-01-20 13:07:24 +000021044 Bool resteerCisOk,
21045 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000021046 const VexArchInfo* archinfo,
21047 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000021048 Prefix pfx, Int sz, Long deltaIN
21049 )
21050{
21051 Long d64 = 0;
21052 IRTemp addr = IRTemp_INVALID;
21053 IRTemp t1 = IRTemp_INVALID;
21054 IRTemp t2 = IRTemp_INVALID;
21055 UChar modrm = 0;
21056 Int am_sz = 0;
21057 Int alen = 0;
21058 HChar dis_buf[50];
21059
21060 /* In the first switch, look for ordinary integer insns. */
21061 Long delta = deltaIN;
21062 UChar opc = getUChar(delta);
21063 delta++;
21064 switch (opc) { /* first switch */
21065
sewardjfe0c5e72012-06-15 15:48:07 +000021066 case 0x01:
sewardj80611e32012-01-20 13:07:24 +000021067 {
sewardj80611e32012-01-20 13:07:24 +000021068 modrm = getUChar(delta);
sewardjfe0c5e72012-06-15 15:48:07 +000021069 /* 0F 01 /0 -- SGDT */
21070 /* 0F 01 /1 -- SIDT */
21071 if (!epartIsReg(modrm)
21072 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21073 /* This is really revolting, but ... since each processor
21074 (core) only has one IDT and one GDT, just let the guest
21075 see it (pass-through semantics). I can't see any way to
21076 construct a faked-up value, so don't bother to try. */
21077 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21078 delta += alen;
21079 switch (gregLO3ofRM(modrm)) {
21080 case 0: DIP("sgdt %s\n", dis_buf); break;
21081 case 1: DIP("sidt %s\n", dis_buf); break;
21082 default: vassert(0); /*NOTREACHED*/
21083 }
21084 IRDirty* d = unsafeIRDirty_0_N (
21085 0/*regparms*/,
21086 "amd64g_dirtyhelper_SxDT",
21087 &amd64g_dirtyhelper_SxDT,
21088 mkIRExprVec_2( mkexpr(addr),
21089 mkU64(gregLO3ofRM(modrm)) )
21090 );
21091 /* declare we're writing memory */
21092 d->mFx = Ifx_Write;
21093 d->mAddr = mkexpr(addr);
21094 d->mSize = 6;
21095 stmt( IRStmt_Dirty(d) );
21096 return delta;
sewardj80611e32012-01-20 13:07:24 +000021097 }
sewardjfe0c5e72012-06-15 15:48:07 +000021098 /* 0F 01 D0 = XGETBV */
21099 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21100 delta += 1;
21101 DIP("xgetbv\n");
21102 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21103 am not sure if that translates in to SEGV or to something
21104 else, in user space. */
21105 t1 = newTemp(Ity_I32);
21106 assign( t1, getIReg32(R_RCX) );
21107 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21108 Ijk_SigSEGV,
21109 IRConst_U64(guest_RIP_curr_instr),
21110 OFFB_RIP
21111 ));
21112 putIRegRAX(4, mkU32(7));
21113 putIRegRDX(4, mkU32(0));
21114 return delta;
21115 }
mjwe3fa0f82014-08-21 17:46:00 +000021116 /* BEGIN HACKY SUPPORT FOR xend */
21117 /* 0F 01 D5 = XEND */
21118 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21119 /* We are never in an transaction (xbegin immediately aborts).
21120 So this just always generates a General Protection Fault. */
21121 delta += 1;
21122 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21123 vassert(dres->whatNext == Dis_StopHere);
21124 DIP("xend\n");
21125 return delta;
21126 }
21127 /* END HACKY SUPPORT FOR xend */
sewardj9e4c3762013-09-27 15:03:58 +000021128 /* BEGIN HACKY SUPPORT FOR xtest */
21129 /* 0F 01 D6 = XTEST */
mjw9d690c62013-10-04 22:31:48 +000021130 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardj9e4c3762013-09-27 15:03:58 +000021131 /* Sets ZF because there never is a transaction, and all
21132 CF, OF, SF, PF and AF are always cleared by xtest. */
21133 delta += 1;
21134 DIP("xtest\n");
21135 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21136 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21137 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21138 /* Set NDEP even though it isn't used. This makes redundant-PUT
21139 elimination of previous stores to this field work better. */
21140 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21141 return delta;
21142 }
21143 /* END HACKY SUPPORT FOR xtest */
sewardj818c7302013-03-26 13:53:18 +000021144 /* 0F 01 F9 = RDTSCP */
21145 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21146 delta += 1;
21147 /* Uses dirty helper:
21148 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21149 declared to wr rax, rcx, rdx
21150 */
21151 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21152 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21153 IRDirty* d
21154 = unsafeIRDirty_0_N ( 0/*regparms*/,
florian90419562013-08-15 20:54:52 +000021155 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) );
sewardj818c7302013-03-26 13:53:18 +000021156 /* declare guest state effects */
sewardj818c7302013-03-26 13:53:18 +000021157 d->nFxState = 3;
21158 vex_bzero(&d->fxState, sizeof(d->fxState));
21159 d->fxState[0].fx = Ifx_Write;
21160 d->fxState[0].offset = OFFB_RAX;
21161 d->fxState[0].size = 8;
21162 d->fxState[1].fx = Ifx_Write;
21163 d->fxState[1].offset = OFFB_RCX;
21164 d->fxState[1].size = 8;
21165 d->fxState[2].fx = Ifx_Write;
21166 d->fxState[2].offset = OFFB_RDX;
21167 d->fxState[2].size = 8;
21168 /* execute the dirty call, side-effecting guest state */
21169 stmt( IRStmt_Dirty(d) );
21170 /* RDTSCP is a serialising insn. So, just in case someone is
21171 using it as a memory fence ... */
21172 stmt( IRStmt_MBE(Imbe_Fence) );
21173 DIP("rdtscp\n");
21174 return delta;
21175 }
sewardjfe0c5e72012-06-15 15:48:07 +000021176 /* else decode failed */
21177 break;
sewardj80611e32012-01-20 13:07:24 +000021178 }
21179
21180 case 0x05: /* SYSCALL */
21181 guest_RIP_next_mustcheck = True;
21182 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21183 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21184 /* It's important that all guest state is up-to-date
21185 at this point. So we declare an end-of-block here, which
21186 forces any cached guest state to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000021187 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21188 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000021189 DIP("syscall\n");
21190 return delta;
21191
sewardj47855302012-07-14 14:31:17 +000021192 case 0x0B: /* UD2 */
21193 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21194 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21195 vassert(dres->whatNext == Dis_StopHere);
21196 DIP("ud2\n");
21197 return delta;
21198
sewardj944ff5a2012-08-23 19:47:05 +000021199 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21200 /* 0F 0D /1 -- prefetchw mem8 */
21201 if (have66orF2orF3(pfx)) goto decode_failure;
21202 modrm = getUChar(delta);
21203 if (epartIsReg(modrm)) goto decode_failure;
21204 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21205 goto decode_failure;
21206 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21207 delta += alen;
21208 switch (gregLO3ofRM(modrm)) {
21209 case 0: DIP("prefetch %s\n", dis_buf); break;
21210 case 1: DIP("prefetchw %s\n", dis_buf); break;
21211 default: vassert(0); /*NOTREACHED*/
21212 }
21213 return delta;
21214
sewardj80611e32012-01-20 13:07:24 +000021215 case 0x1F:
21216 if (haveF2orF3(pfx)) goto decode_failure;
21217 modrm = getUChar(delta);
21218 if (epartIsReg(modrm)) goto decode_failure;
21219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21220 delta += alen;
21221 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21222 return delta;
21223
21224 case 0x31: { /* RDTSC */
21225 IRTemp val = newTemp(Ity_I64);
21226 IRExpr** args = mkIRExprVec_0();
21227 IRDirty* d = unsafeIRDirty_1_N (
21228 val,
21229 0/*regparms*/,
21230 "amd64g_dirtyhelper_RDTSC",
21231 &amd64g_dirtyhelper_RDTSC,
21232 args
21233 );
21234 if (have66orF2orF3(pfx)) goto decode_failure;
21235 /* execute the dirty call, dumping the result in val. */
21236 stmt( IRStmt_Dirty(d) );
21237 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21238 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21239 DIP("rdtsc\n");
21240 return delta;
21241 }
21242
21243 case 0x40:
21244 case 0x41:
21245 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21246 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21247 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21248 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21249 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21250 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21251 case 0x48: /* CMOVSb (cmov negative) */
21252 case 0x49: /* CMOVSb (cmov not negative) */
21253 case 0x4A: /* CMOVP (cmov parity even) */
21254 case 0x4B: /* CMOVNP (cmov parity odd) */
21255 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21256 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21257 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21258 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21259 if (haveF2orF3(pfx)) goto decode_failure;
21260 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21261 return delta;
21262
21263 case 0x80:
21264 case 0x81:
21265 case 0x82: /* JBb/JNAEb (jump below) */
21266 case 0x83: /* JNBb/JAEb (jump not below) */
21267 case 0x84: /* JZb/JEb (jump zero) */
21268 case 0x85: /* JNZb/JNEb (jump not zero) */
21269 case 0x86: /* JBEb/JNAb (jump below or equal) */
21270 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21271 case 0x88: /* JSb (jump negative) */
21272 case 0x89: /* JSb (jump not negative) */
21273 case 0x8A: /* JP (jump parity even) */
21274 case 0x8B: /* JNP/JPO (jump parity odd) */
21275 case 0x8C: /* JLb/JNGEb (jump less) */
21276 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21277 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21278 case 0x8F: { /* JGb/JNLEb (jump greater) */
21279 Long jmpDelta;
florian55085f82012-11-21 00:36:55 +000021280 const HChar* comment = "";
mjw67ac3fd2014-05-09 11:41:06 +000021281 if (haveF3(pfx)) goto decode_failure;
21282 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000021283 jmpDelta = getSDisp32(delta);
21284 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21285 delta += 4;
21286 if (resteerCisOk
21287 && vex_control.guest_chase_cond
21288 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21289 && jmpDelta < 0
florianbeac5302014-12-31 12:09:38 +000021290 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
sewardj80611e32012-01-20 13:07:24 +000021291 /* Speculation: assume this backward branch is taken. So
21292 we need to emit a side-exit to the insn following this
21293 one, on the negation of the condition, and continue at
21294 the branch target address (d64). If we wind up back at
21295 the first instruction of the trace, just stop; it's
21296 better to let the IR loop unroller handle that case. */
21297 stmt( IRStmt_Exit(
21298 mk_amd64g_calculate_condition(
21299 (AMD64Condcode)(1 ^ (opc - 0x80))),
21300 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000021301 IRConst_U64(guest_RIP_bbstart+delta),
21302 OFFB_RIP
21303 ));
sewardj80611e32012-01-20 13:07:24 +000021304 dres->whatNext = Dis_ResteerC;
21305 dres->continueAt = d64;
21306 comment = "(assumed taken)";
21307 }
21308 else
21309 if (resteerCisOk
21310 && vex_control.guest_chase_cond
21311 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21312 && jmpDelta >= 0
21313 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
21314 /* Speculation: assume this forward branch is not taken.
21315 So we need to emit a side-exit to d64 (the dest) and
21316 continue disassembling at the insn immediately
21317 following this one. */
21318 stmt( IRStmt_Exit(
21319 mk_amd64g_calculate_condition((AMD64Condcode)
21320 (opc - 0x80)),
21321 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000021322 IRConst_U64(d64),
21323 OFFB_RIP
21324 ));
sewardj80611e32012-01-20 13:07:24 +000021325 dres->whatNext = Dis_ResteerC;
21326 dres->continueAt = guest_RIP_bbstart+delta;
21327 comment = "(assumed not taken)";
21328 }
21329 else {
21330 /* Conservative default translation - end the block at
21331 this point. */
sewardjc6f970f2012-04-02 21:54:49 +000021332 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21333 guest_RIP_bbstart+delta, d64 );
21334 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000021335 }
21336 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
21337 return delta;
21338 }
21339
21340 case 0x90:
21341 case 0x91:
21342 case 0x92: /* set-Bb/set-NAEb (set if below) */
21343 case 0x93: /* set-NBb/set-AEb (set if not below) */
21344 case 0x94: /* set-Zb/set-Eb (set if zero) */
21345 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21346 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21347 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21348 case 0x98: /* set-Sb (set if negative) */
21349 case 0x99: /* set-Sb (set if not negative) */
21350 case 0x9A: /* set-P (set if parity even) */
21351 case 0x9B: /* set-NP (set if parity odd) */
21352 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21353 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21354 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21355 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21356 if (haveF2orF3(pfx)) goto decode_failure;
21357 t1 = newTemp(Ity_I8);
21358 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21359 modrm = getUChar(delta);
21360 if (epartIsReg(modrm)) {
21361 delta++;
21362 putIRegE(1, pfx, modrm, mkexpr(t1));
21363 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21364 nameIRegE(1,pfx,modrm));
21365 } else {
21366 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21367 delta += alen;
21368 storeLE( mkexpr(addr), mkexpr(t1) );
21369 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21370 }
21371 return delta;
21372
mjw67ac3fd2014-05-09 11:41:06 +000021373 case 0x1A:
21374 case 0x1B: { /* Future MPX instructions, currently NOPs.
21375 BNDMK b, m F3 0F 1B
21376 BNDCL b, r/m F3 0F 1A
21377 BNDCU b, r/m F2 0F 1A
21378 BNDCN b, r/m F2 0F 1B
21379 BNDMOV b, b/m 66 0F 1A
21380 BNDMOV b/m, b 66 0F 1B
21381 BNDLDX b, mib 0F 1A
21382 BNDSTX mib, b 0F 1B */
21383
21384 /* All instructions have two operands. One operand is always the
21385 bnd register number (bnd0-bnd3, other register numbers are
21386 ignored when MPX isn't enabled, but should generate an
21387 exception if MPX is enabled) given by gregOfRexRM. The other
21388 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21389 address, all of which can be decoded by using either
21390 eregOfRexRM or disAMode. */
21391
21392 modrm = getUChar(delta);
21393 int bnd = gregOfRexRM(pfx,modrm);
21394 const HChar *oper;
21395 if (epartIsReg(modrm)) {
21396 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
21397 delta += 1;
21398 } else {
21399 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21400 delta += alen;
21401 oper = dis_buf;
21402 }
21403
21404 if (haveF3no66noF2 (pfx)) {
21405 if (opc == 0x1B) {
21406 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
21407 } else /* opc == 0x1A */ {
21408 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
21409 }
21410 } else if (haveF2no66noF3 (pfx)) {
21411 if (opc == 0x1A) {
21412 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
21413 } else /* opc == 0x1B */ {
21414 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
21415 }
21416 } else if (have66noF2noF3 (pfx)) {
21417 if (opc == 0x1A) {
21418 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
21419 } else /* opc == 0x1B */ {
21420 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
21421 }
21422 } else if (haveNo66noF2noF3 (pfx)) {
21423 if (opc == 0x1A) {
21424 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
21425 } else /* opc == 0x1B */ {
21426 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
21427 }
21428 } else goto decode_failure;
21429
21430 return delta;
21431 }
21432
sewardj80611e32012-01-20 13:07:24 +000021433 case 0xA2: { /* CPUID */
21434 /* Uses dirty helper:
21435 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
21436 declared to mod rax, wr rbx, rcx, rdx
21437 */
sewardje3a10d72014-11-11 12:49:21 +000021438 IRDirty* d = NULL;
21439 const HChar* fName = NULL;
21440 void* fAddr = NULL;
21441
21442 /* JRS 2014-11-11: this a really horrible temp kludge to work
21443 around the fact that the Yosemite (OSX 10.10)
21444 /usr/lib/system/libdyld.dylib expects XSAVE/XRSTOR to be
21445 implemented, because amd64g_dirtyhelper_CPUID_avx_and_cx16
21446 claims they are supported, but so far they aren't. So cause
21447 it to fall back to a simpler CPU. The cleaner approach of
21448 setting CPUID(eax=1).OSXSAVE=0 and .XSAVE=0 isn't desirable
21449 since it will (per the official Intel guidelines) lead to
21450 software concluding that AVX isn't supported.
21451
21452 This is also a kludge in that putting these ifdefs here checks
21453 the build (host) architecture, when really we're checking the
21454 guest architecture. */
21455 Bool this_is_yosemite = False;
21456# if defined(VGP_amd64_darwin) && DARWIN_VERS == DARWIN_10_10
21457 this_is_yosemite = True;
21458# endif
21459
sewardj80611e32012-01-20 13:07:24 +000021460 if (haveF2orF3(pfx)) goto decode_failure;
mjwc31e6cb2013-09-13 13:30:45 +000021461 /* This isn't entirely correct, CPUID should depend on the VEX
21462 capabilities, not on the underlying CPU. See bug #324882. */
sewardje3a10d72014-11-11 12:49:21 +000021463 if (!this_is_yosemite &&
21464 (archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
mjwc31e6cb2013-09-13 13:30:45 +000021465 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
21466 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardjfe0c5e72012-06-15 15:48:07 +000021467 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
21468 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
21469 /* This is a Core-i5-2300-like machine */
21470 }
mjwc31e6cb2013-09-13 13:30:45 +000021471 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
21472 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
sewardj80611e32012-01-20 13:07:24 +000021473 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
21474 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
sewardjfe0c5e72012-06-15 15:48:07 +000021475 /* This is a Core-i5-670-like machine */
sewardj80611e32012-01-20 13:07:24 +000021476 }
21477 else {
21478 /* Give a CPUID for at least a baseline machine, SSE2
21479 only, and no CX16 */
21480 fName = "amd64g_dirtyhelper_CPUID_baseline";
21481 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
21482 }
21483
21484 vassert(fName); vassert(fAddr);
21485 d = unsafeIRDirty_0_N ( 0/*regparms*/,
florian90419562013-08-15 20:54:52 +000021486 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) );
sewardj80611e32012-01-20 13:07:24 +000021487 /* declare guest state effects */
sewardj80611e32012-01-20 13:07:24 +000021488 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +000021489 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000021490 d->fxState[0].fx = Ifx_Modify;
21491 d->fxState[0].offset = OFFB_RAX;
21492 d->fxState[0].size = 8;
21493 d->fxState[1].fx = Ifx_Write;
21494 d->fxState[1].offset = OFFB_RBX;
21495 d->fxState[1].size = 8;
21496 d->fxState[2].fx = Ifx_Modify;
21497 d->fxState[2].offset = OFFB_RCX;
21498 d->fxState[2].size = 8;
21499 d->fxState[3].fx = Ifx_Write;
21500 d->fxState[3].offset = OFFB_RDX;
21501 d->fxState[3].size = 8;
21502 /* execute the dirty call, side-effecting guest state */
21503 stmt( IRStmt_Dirty(d) );
21504 /* CPUID is a serialising insn. So, just in case someone is
21505 using it as a memory fence ... */
21506 stmt( IRStmt_MBE(Imbe_Fence) );
21507 DIP("cpuid\n");
21508 return delta;
21509 }
21510
sewardj38b1d692013-10-15 17:21:42 +000021511 case 0xA3: { /* BT Gv,Ev */
21512 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21513 Bool ok = True;
sewardj80611e32012-01-20 13:07:24 +000021514 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021515 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
21516 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021517 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021518 }
sewardj80611e32012-01-20 13:07:24 +000021519
21520 case 0xA4: /* SHLDv imm8,Gv,Ev */
21521 modrm = getUChar(delta);
21522 d64 = delta + lengthAMode(pfx, delta);
21523 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
21524 delta = dis_SHLRD_Gv_Ev (
21525 vbi, pfx, delta, modrm, sz,
21526 mkU8(getUChar(d64)), True, /* literal */
21527 dis_buf, True /* left */ );
21528 return delta;
21529
21530 case 0xA5: /* SHLDv %cl,Gv,Ev */
21531 modrm = getUChar(delta);
21532 delta = dis_SHLRD_Gv_Ev (
21533 vbi, pfx, delta, modrm, sz,
21534 getIRegCL(), False, /* not literal */
21535 "%cl", True /* left */ );
21536 return delta;
21537
sewardj38b1d692013-10-15 17:21:42 +000021538 case 0xAB: { /* BTS Gv,Ev */
21539 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21540 Bool ok = True;
sewardj80611e32012-01-20 13:07:24 +000021541 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021542 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
21543 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021544 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021545 }
sewardj80611e32012-01-20 13:07:24 +000021546
21547 case 0xAC: /* SHRDv imm8,Gv,Ev */
21548 modrm = getUChar(delta);
21549 d64 = delta + lengthAMode(pfx, delta);
21550 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
21551 delta = dis_SHLRD_Gv_Ev (
21552 vbi, pfx, delta, modrm, sz,
21553 mkU8(getUChar(d64)), True, /* literal */
21554 dis_buf, False /* right */ );
21555 return delta;
21556
21557 case 0xAD: /* SHRDv %cl,Gv,Ev */
21558 modrm = getUChar(delta);
21559 delta = dis_SHLRD_Gv_Ev (
21560 vbi, pfx, delta, modrm, sz,
21561 getIRegCL(), False, /* not literal */
21562 "%cl", False /* right */);
21563 return delta;
21564
21565 case 0xAF: /* IMUL Ev, Gv */
21566 if (haveF2orF3(pfx)) goto decode_failure;
21567 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
21568 return delta;
21569
sewardj38b1d692013-10-15 17:21:42 +000021570 case 0xB0: { /* CMPXCHG Gb,Eb */
21571 Bool ok = True;
21572 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
21573 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
21574 if (!ok) goto decode_failure;
21575 return delta;
21576 }
21577
sewardj80611e32012-01-20 13:07:24 +000021578 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
21579 Bool ok = True;
sewardj38b1d692013-10-15 17:21:42 +000021580 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021581 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
21582 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
21583 if (!ok) goto decode_failure;
21584 return delta;
21585 }
21586
sewardj38b1d692013-10-15 17:21:42 +000021587 case 0xB3: { /* BTR Gv,Ev */
21588 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021589 Bool ok = True;
sewardj38b1d692013-10-15 17:21:42 +000021590 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
21591 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
sewardj80611e32012-01-20 13:07:24 +000021592 if (!ok) goto decode_failure;
21593 return delta;
21594 }
21595
sewardj80611e32012-01-20 13:07:24 +000021596 case 0xB6: /* MOVZXb Eb,Gv */
21597 if (haveF2orF3(pfx)) goto decode_failure;
21598 if (sz != 2 && sz != 4 && sz != 8)
21599 goto decode_failure;
21600 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
21601 return delta;
21602
21603 case 0xB7: /* MOVZXw Ew,Gv */
21604 if (haveF2orF3(pfx)) goto decode_failure;
21605 if (sz != 4 && sz != 8)
21606 goto decode_failure;
21607 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
21608 return delta;
21609
21610 case 0xBA: { /* Grp8 Ib,Ev */
sewardj38b1d692013-10-15 17:21:42 +000021611 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021612 Bool decode_OK = False;
sewardj80611e32012-01-20 13:07:24 +000021613 modrm = getUChar(delta);
21614 am_sz = lengthAMode(pfx,delta);
21615 d64 = getSDisp8(delta + am_sz);
21616 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
21617 &decode_OK );
21618 if (!decode_OK)
21619 goto decode_failure;
21620 return delta;
21621 }
21622
sewardj38b1d692013-10-15 17:21:42 +000021623 case 0xBB: { /* BTC Gv,Ev */
21624 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21625 Bool ok = False;
sewardj80611e32012-01-20 13:07:24 +000021626 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021627 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
21628 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021629 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021630 }
sewardj80611e32012-01-20 13:07:24 +000021631
21632 case 0xBC: /* BSF Gv,Ev */
sewardjcc3d2192013-03-27 11:37:33 +000021633 if (!haveF2orF3(pfx)
21634 || (haveF3noF2(pfx)
21635 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
21636 /* no-F2 no-F3 0F BC = BSF
21637 or F3 0F BC = REP; BSF on older CPUs. */
21638 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
21639 return delta;
21640 }
21641 /* Fall through, since F3 0F BC is TZCNT, and needs to
21642 be handled by dis_ESC_0F__SSE4. */
21643 break;
sewardj80611e32012-01-20 13:07:24 +000021644
21645 case 0xBD: /* BSR Gv,Ev */
sewardjc8851af2012-08-23 20:14:51 +000021646 if (!haveF2orF3(pfx)
21647 || (haveF3noF2(pfx)
21648 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
21649 /* no-F2 no-F3 0F BD = BSR
21650 or F3 0F BD = REP; BSR on older CPUs. */
sewardj80611e32012-01-20 13:07:24 +000021651 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
21652 return delta;
21653 }
21654 /* Fall through, since F3 0F BD is LZCNT, and needs to
21655 be handled by dis_ESC_0F__SSE4. */
21656 break;
21657
21658 case 0xBE: /* MOVSXb Eb,Gv */
21659 if (haveF2orF3(pfx)) goto decode_failure;
21660 if (sz != 2 && sz != 4 && sz != 8)
21661 goto decode_failure;
21662 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
21663 return delta;
21664
21665 case 0xBF: /* MOVSXw Ew,Gv */
21666 if (haveF2orF3(pfx)) goto decode_failure;
21667 if (sz != 4 && sz != 8)
21668 goto decode_failure;
21669 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
21670 return delta;
21671
sewardjdbcb1df2012-12-06 17:29:10 +000021672 case 0xC0: { /* XADD Gb,Eb */
21673 Bool decode_OK = False;
21674 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
21675 if (!decode_OK)
21676 goto decode_failure;
21677 return delta;
21678 }
21679
sewardj80611e32012-01-20 13:07:24 +000021680 case 0xC1: { /* XADD Gv,Ev */
21681 Bool decode_OK = False;
21682 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
21683 if (!decode_OK)
21684 goto decode_failure;
21685 return delta;
21686 }
21687
21688 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
21689 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
21690 IRTemp expdHi = newTemp(elemTy);
21691 IRTemp expdLo = newTemp(elemTy);
21692 IRTemp dataHi = newTemp(elemTy);
21693 IRTemp dataLo = newTemp(elemTy);
21694 IRTemp oldHi = newTemp(elemTy);
21695 IRTemp oldLo = newTemp(elemTy);
21696 IRTemp flags_old = newTemp(Ity_I64);
21697 IRTemp flags_new = newTemp(Ity_I64);
21698 IRTemp success = newTemp(Ity_I1);
21699 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
21700 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
21701 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
21702 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
21703 IRTemp expdHi64 = newTemp(Ity_I64);
21704 IRTemp expdLo64 = newTemp(Ity_I64);
21705
21706 /* Translate this using a DCAS, even if there is no LOCK
21707 prefix. Life is too short to bother with generating two
21708 different translations for the with/without-LOCK-prefix
21709 cases. */
21710 *expect_CAS = True;
21711
21712 /* Decode, and generate address. */
sewardj38b1d692013-10-15 17:21:42 +000021713 if (have66(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021714 if (sz != 4 && sz != 8) goto decode_failure;
21715 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
21716 goto decode_failure;
21717 modrm = getUChar(delta);
21718 if (epartIsReg(modrm)) goto decode_failure;
21719 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021720 if (haveF2orF3(pfx)) {
21721 /* Since the e-part is memory only, F2 or F3 (one or the
21722 other) is acceptable if LOCK is also present. But only
21723 for cmpxchg8b. */
21724 if (sz == 8) goto decode_failure;
21725 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure;
21726 }
21727
sewardj80611e32012-01-20 13:07:24 +000021728 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21729 delta += alen;
21730
21731 /* cmpxchg16b requires an alignment check. */
21732 if (sz == 8)
21733 gen_SEGV_if_not_16_aligned( addr );
21734
21735 /* Get the expected and new values. */
21736 assign( expdHi64, getIReg64(R_RDX) );
21737 assign( expdLo64, getIReg64(R_RAX) );
21738
21739 /* These are the correctly-sized expected and new values.
21740 However, we also get expdHi64/expdLo64 above as 64-bits
21741 regardless, because we will need them later in the 32-bit
21742 case (paradoxically). */
21743 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
21744 : mkexpr(expdHi64) );
21745 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
21746 : mkexpr(expdLo64) );
21747 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
21748 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
21749
21750 /* Do the DCAS */
21751 stmt( IRStmt_CAS(
21752 mkIRCAS( oldHi, oldLo,
21753 Iend_LE, mkexpr(addr),
21754 mkexpr(expdHi), mkexpr(expdLo),
21755 mkexpr(dataHi), mkexpr(dataLo)
21756 )));
21757
21758 /* success when oldHi:oldLo == expdHi:expdLo */
21759 assign( success,
21760 binop(opCasCmpEQ,
21761 binop(opOR,
21762 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
21763 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
21764 ),
21765 zero
21766 ));
21767
21768 /* If the DCAS is successful, that is to say oldHi:oldLo ==
21769 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
21770 which is where they came from originally. Both the actual
21771 contents of these two regs, and any shadow values, are
21772 unchanged. If the DCAS fails then we're putting into
21773 RDX:RAX the value seen in memory. */
21774 /* Now of course there's a complication in the 32-bit case
21775 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
21776 unchanged; but if we use the same scheme as in the 64-bit
21777 case, we get hit by the standard rule that a write to the
21778 bottom 32 bits of an integer register zeros the upper 32
21779 bits. And so the upper halves of RDX and RAX mysteriously
21780 become zero. So we have to stuff back in the original
21781 64-bit values which we previously stashed in
21782 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
21783 /* It's just _so_ much fun ... */
21784 putIRegRDX( 8,
florian99dd03e2013-01-29 03:56:06 +000021785 IRExpr_ITE( mkexpr(success),
21786 mkexpr(expdHi64),
21787 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
21788 : mkexpr(oldHi)
sewardj80611e32012-01-20 13:07:24 +000021789 ));
21790 putIRegRAX( 8,
florian99dd03e2013-01-29 03:56:06 +000021791 IRExpr_ITE( mkexpr(success),
21792 mkexpr(expdLo64),
21793 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
21794 : mkexpr(oldLo)
sewardj80611e32012-01-20 13:07:24 +000021795 ));
21796
21797 /* Copy the success bit into the Z flag and leave the others
21798 unchanged */
21799 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
21800 assign(
21801 flags_new,
21802 binop(Iop_Or64,
21803 binop(Iop_And64, mkexpr(flags_old),
21804 mkU64(~AMD64G_CC_MASK_Z)),
21805 binop(Iop_Shl64,
21806 binop(Iop_And64,
21807 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
21808 mkU8(AMD64G_CC_SHIFT_Z)) ));
21809
21810 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21811 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
21812 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21813 /* Set NDEP even though it isn't used. This makes
21814 redundant-PUT elimination of previous stores to this field
21815 work better. */
21816 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21817
21818 /* Sheesh. Aren't you glad it was me and not you that had to
21819 write and validate all this grunge? */
21820
21821 DIP("cmpxchg8b %s\n", dis_buf);
21822 return delta;
21823 }
21824
21825 case 0xC8: /* BSWAP %eax */
21826 case 0xC9:
21827 case 0xCA:
21828 case 0xCB:
21829 case 0xCC:
21830 case 0xCD:
21831 case 0xCE:
21832 case 0xCF: /* BSWAP %edi */
21833 if (haveF2orF3(pfx)) goto decode_failure;
21834 /* According to the AMD64 docs, this insn can have size 4 or
21835 8. */
21836 if (sz == 4) {
21837 t1 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000021838 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000021839 t2 = math_BSWAP( t1, Ity_I32 );
sewardj80611e32012-01-20 13:07:24 +000021840 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
21841 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
21842 return delta;
21843 }
21844 if (sz == 8) {
sewardj80611e32012-01-20 13:07:24 +000021845 t1 = newTemp(Ity_I64);
21846 t2 = newTemp(Ity_I64);
21847 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000021848 t2 = math_BSWAP( t1, Ity_I64 );
sewardj80611e32012-01-20 13:07:24 +000021849 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
21850 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
21851 return delta;
21852 }
21853 goto decode_failure;
21854
21855 default:
21856 break;
21857
21858 } /* first switch */
21859
21860
21861 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
21862 /* In the second switch, pick off MMX insns. */
21863
21864 if (!have66orF2orF3(pfx)) {
21865 /* So there's no SIMD prefix. */
21866
21867 vassert(sz == 4 || sz == 8);
21868
21869 switch (opc) { /* second switch */
21870
21871 case 0x71:
21872 case 0x72:
21873 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
21874
21875 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
21876 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
21877 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
21878 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
21879
21880 case 0xFC:
21881 case 0xFD:
21882 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
21883
21884 case 0xEC:
21885 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
21886
21887 case 0xDC:
21888 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
21889
21890 case 0xF8:
21891 case 0xF9:
21892 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
21893
21894 case 0xE8:
21895 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
21896
21897 case 0xD8:
21898 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
21899
21900 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
21901 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
21902
21903 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
21904
21905 case 0x74:
21906 case 0x75:
21907 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
21908
21909 case 0x64:
21910 case 0x65:
21911 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
21912
21913 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
21914 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
21915 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
21916
21917 case 0x68:
21918 case 0x69:
21919 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
21920
21921 case 0x60:
21922 case 0x61:
21923 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
21924
21925 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
21926 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
21927 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
21928 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
21929
21930 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
21931 case 0xF2:
21932 case 0xF3:
21933
21934 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
21935 case 0xD2:
21936 case 0xD3:
21937
21938 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
21939 case 0xE2: {
21940 Bool decode_OK = False;
21941 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
21942 if (decode_OK)
21943 return delta;
21944 goto decode_failure;
21945 }
21946
21947 default:
21948 break;
21949 } /* second switch */
21950
21951 }
21952
21953 /* A couple of MMX corner cases */
21954 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
21955 if (sz != 4)
21956 goto decode_failure;
21957 do_EMMS_preamble();
21958 DIP("{f}emms\n");
21959 return delta;
21960 }
21961
21962 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
21963 /* Perhaps it's an SSE or SSE2 instruction. We can try this
21964 without checking the guest hwcaps because SSE2 is a baseline
21965 facility in 64 bit mode. */
21966 {
21967 Bool decode_OK = False;
sewardj30fc0582012-02-16 13:45:13 +000021968 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres );
sewardj80611e32012-01-20 13:07:24 +000021969 if (decode_OK)
21970 return delta;
21971 }
21972
21973 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
21974 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
21975 first. */
21976 {
21977 Bool decode_OK = False;
21978 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
21979 if (decode_OK)
21980 return delta;
21981 }
21982
21983 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
21984 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
21985 first. */
21986 {
21987 Bool decode_OK = False;
21988 delta = dis_ESC_0F__SSE4 ( &decode_OK,
21989 archinfo, vbi, pfx, sz, deltaIN );
21990 if (decode_OK)
21991 return delta;
21992 }
21993
21994 decode_failure:
21995 return deltaIN; /* fail */
21996}
21997
21998
21999/*------------------------------------------------------------*/
22000/*--- ---*/
22001/*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22002/*--- ---*/
22003/*------------------------------------------------------------*/
22004
22005__attribute__((noinline))
22006static
22007Long dis_ESC_0F38 (
22008 /*MB_OUT*/DisResult* dres,
florianbeac5302014-12-31 12:09:38 +000022009 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardj80611e32012-01-20 13:07:24 +000022010 Bool resteerCisOk,
22011 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000022012 const VexArchInfo* archinfo,
22013 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000022014 Prefix pfx, Int sz, Long deltaIN
22015 )
22016{
22017 Long delta = deltaIN;
22018 UChar opc = getUChar(delta);
22019 delta++;
22020 switch (opc) {
22021
sewardj1a237be2012-07-16 08:35:31 +000022022 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22023 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22024 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22025 && (sz == 2 || sz == 4 || sz == 8)) {
22026 IRTemp addr = IRTemp_INVALID;
22027 UChar modrm = 0;
22028 Int alen = 0;
22029 HChar dis_buf[50];
22030 modrm = getUChar(delta);
22031 if (epartIsReg(modrm)) break;
22032 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22033 delta += alen;
22034 IRType ty = szToITy(sz);
22035 IRTemp src = newTemp(ty);
22036 if (opc == 0xF0) { /* LOAD */
22037 assign(src, loadLE(ty, mkexpr(addr)));
22038 IRTemp dst = math_BSWAP(src, ty);
22039 putIRegG(sz, pfx, modrm, mkexpr(dst));
22040 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22041 } else { /* STORE */
22042 assign(src, getIRegG(sz, pfx, modrm));
22043 IRTemp dst = math_BSWAP(src, ty);
22044 storeLE(mkexpr(addr), mkexpr(dst));
22045 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22046 }
22047 return delta;
sewardjf85e1772012-07-15 10:11:10 +000022048 }
sewardj1a237be2012-07-16 08:35:31 +000022049 /* else fall through; maybe one of the decoders below knows what
22050 it is. */
22051 break;
sewardjf85e1772012-07-15 10:11:10 +000022052 }
22053
sewardj80611e32012-01-20 13:07:24 +000022054 default:
22055 break;
22056
22057 }
22058
22059 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22060 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22061 rather than proceeding indiscriminately. */
22062 {
22063 Bool decode_OK = False;
22064 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22065 if (decode_OK)
22066 return delta;
22067 }
22068
22069 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22070 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22071 rather than proceeding indiscriminately. */
22072 {
22073 Bool decode_OK = False;
22074 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22075 if (decode_OK)
22076 return delta;
22077 }
22078
sewardj1a237be2012-07-16 08:35:31 +000022079 /*decode_failure:*/
sewardj80611e32012-01-20 13:07:24 +000022080 return deltaIN; /* fail */
22081}
22082
22083
22084/*------------------------------------------------------------*/
22085/*--- ---*/
22086/*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22087/*--- ---*/
22088/*------------------------------------------------------------*/
22089
22090__attribute__((noinline))
22091static
22092Long dis_ESC_0F3A (
22093 /*MB_OUT*/DisResult* dres,
florianbeac5302014-12-31 12:09:38 +000022094 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardj80611e32012-01-20 13:07:24 +000022095 Bool resteerCisOk,
22096 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000022097 const VexArchInfo* archinfo,
22098 const VexAbiInfo* vbi,
sewardj80611e32012-01-20 13:07:24 +000022099 Prefix pfx, Int sz, Long deltaIN
22100 )
22101{
22102 Long delta = deltaIN;
22103 UChar opc = getUChar(delta);
22104 delta++;
22105 switch (opc) {
22106
22107 default:
22108 break;
22109
22110 }
22111
22112 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22113 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22114 rather than proceeding indiscriminately. */
22115 {
22116 Bool decode_OK = False;
22117 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22118 if (decode_OK)
22119 return delta;
22120 }
22121
22122 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22123 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22124 rather than proceeding indiscriminately. */
22125 {
22126 Bool decode_OK = False;
22127 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22128 if (decode_OK)
22129 return delta;
22130 }
22131
sewardj80611e32012-01-20 13:07:24 +000022132 return deltaIN; /* fail */
22133}
22134
22135
22136/*------------------------------------------------------------*/
22137/*--- ---*/
sewardjc4530ae2012-05-21 10:18:49 +000022138/*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22139/*--- ---*/
22140/*------------------------------------------------------------*/
22141
sewardj56c30312012-06-12 08:45:39 +000022142/* FIXME: common up with the _256_ version below? */
sewardjc4530ae2012-05-21 10:18:49 +000022143static
22144Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
floriancacba8e2014-12-15 18:58:07 +000022145 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022146 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022147 /* The actual operation. Use either 'op' or 'opfn',
22148 but not both. */
22149 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
sewardj44565e82012-05-22 09:14:15 +000022150 Bool invertLeftArg,
22151 Bool swapArgs
sewardjc4530ae2012-05-21 10:18:49 +000022152 )
22153{
22154 UChar modrm = getUChar(delta);
22155 UInt rD = gregOfRexRM(pfx, modrm);
22156 UInt rSL = getVexNvvvv(pfx);
22157 IRTemp tSL = newTemp(Ity_V128);
22158 IRTemp tSR = newTemp(Ity_V128);
22159 IRTemp addr = IRTemp_INVALID;
22160 HChar dis_buf[50];
22161 Int alen = 0;
22162 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22163
22164 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22165 : getXMMReg(rSL));
22166
22167 if (epartIsReg(modrm)) {
22168 UInt rSR = eregOfRexRM(pfx, modrm);
22169 delta += 1;
22170 assign(tSR, getXMMReg(rSR));
22171 DIP("%s %s,%s,%s\n",
22172 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22173 } else {
22174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22175 delta += alen;
22176 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22177 DIP("%s %s,%s,%s\n",
22178 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22179 }
22180
22181 IRTemp res = IRTemp_INVALID;
22182 if (op != Iop_INVALID) {
22183 vassert(opFn == NULL);
22184 res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000022185 if (requiresRMode(op)) {
22186 IRTemp rm = newTemp(Ity_I32);
22187 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22188 assign(res, swapArgs
22189 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22190 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22191 } else {
22192 assign(res, swapArgs
22193 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22194 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22195 }
sewardjc4530ae2012-05-21 10:18:49 +000022196 } else {
22197 vassert(opFn != NULL);
sewardj44565e82012-05-22 09:14:15 +000022198 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
sewardjc4530ae2012-05-21 10:18:49 +000022199 }
22200
22201 putYMMRegLoAndZU(rD, mkexpr(res));
22202
22203 *uses_vvvv = True;
22204 return delta;
22205}
22206
22207
22208/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
sewardj44565e82012-05-22 09:14:15 +000022209 for the operation, no inversion of the left arg, and no swapping of
22210 args. */
sewardjc4530ae2012-05-21 10:18:49 +000022211static
22212Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
floriancacba8e2014-12-15 18:58:07 +000022213 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022214 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022215 IROp op
22216 )
22217{
22218 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000022219 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
sewardjc4530ae2012-05-21 10:18:49 +000022220}
22221
22222
22223/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
sewardj44565e82012-05-22 09:14:15 +000022224 generator to compute the result, no inversion of the left
22225 arg, and no swapping of args. */
sewardjc4530ae2012-05-21 10:18:49 +000022226static
22227Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
floriancacba8e2014-12-15 18:58:07 +000022228 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022229 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022230 IRTemp(*opFn)(IRTemp,IRTemp)
22231 )
22232{
22233 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000022234 uses_vvvv, vbi, pfx, delta, name,
22235 Iop_INVALID, opFn, False, False );
sewardjc4530ae2012-05-21 10:18:49 +000022236}
22237
22238
sewardj4c0a7ac2012-06-21 09:08:19 +000022239/* Vector by scalar shift of V by the amount specified at the bottom
22240 of E. */
floriancacba8e2014-12-15 18:58:07 +000022241static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
sewardj4c0a7ac2012-06-21 09:08:19 +000022242 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022243 const HChar* opname, IROp op )
sewardj4c0a7ac2012-06-21 09:08:19 +000022244{
22245 HChar dis_buf[50];
22246 Int alen, size;
22247 IRTemp addr;
22248 Bool shl, shr, sar;
22249 UChar modrm = getUChar(delta);
22250 UInt rG = gregOfRexRM(pfx,modrm);
22251 UInt rV = getVexNvvvv(pfx);;
22252 IRTemp g0 = newTemp(Ity_V128);
22253 IRTemp g1 = newTemp(Ity_V128);
22254 IRTemp amt = newTemp(Ity_I64);
22255 IRTemp amt8 = newTemp(Ity_I8);
22256 if (epartIsReg(modrm)) {
22257 UInt rE = eregOfRexRM(pfx,modrm);
22258 assign( amt, getXMMRegLane64(rE, 0) );
22259 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22260 nameXMMReg(rV), nameXMMReg(rG) );
22261 delta++;
22262 } else {
22263 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22264 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22265 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
22266 delta += alen;
22267 }
22268 assign( g0, getXMMReg(rV) );
22269 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22270
22271 shl = shr = sar = False;
22272 size = 0;
22273 switch (op) {
22274 case Iop_ShlN16x8: shl = True; size = 32; break;
22275 case Iop_ShlN32x4: shl = True; size = 32; break;
22276 case Iop_ShlN64x2: shl = True; size = 64; break;
22277 case Iop_SarN16x8: sar = True; size = 16; break;
22278 case Iop_SarN32x4: sar = True; size = 32; break;
22279 case Iop_ShrN16x8: shr = True; size = 16; break;
22280 case Iop_ShrN32x4: shr = True; size = 32; break;
22281 case Iop_ShrN64x2: shr = True; size = 64; break;
22282 default: vassert(0);
22283 }
22284
22285 if (shl || shr) {
22286 assign(
22287 g1,
florian99dd03e2013-01-29 03:56:06 +000022288 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000022289 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +000022290 binop(op, mkexpr(g0), mkexpr(amt8)),
22291 mkV128(0x0000)
sewardj4c0a7ac2012-06-21 09:08:19 +000022292 )
22293 );
22294 } else
22295 if (sar) {
22296 assign(
22297 g1,
florian99dd03e2013-01-29 03:56:06 +000022298 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000022299 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +000022300 binop(op, mkexpr(g0), mkexpr(amt8)),
22301 binop(op, mkexpr(g0), mkU8(size-1))
sewardj4c0a7ac2012-06-21 09:08:19 +000022302 )
22303 );
22304 } else {
22305 vassert(0);
22306 }
22307
22308 putYMMRegLoAndZU( rG, mkexpr(g1) );
22309 return delta;
22310}
22311
22312
sewardjcc3d2192013-03-27 11:37:33 +000022313/* Vector by scalar shift of V by the amount specified at the bottom
22314 of E. */
floriancacba8e2014-12-15 18:58:07 +000022315static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000022316 Prefix pfx, Long delta,
22317 const HChar* opname, IROp op )
22318{
22319 HChar dis_buf[50];
22320 Int alen, size;
22321 IRTemp addr;
22322 Bool shl, shr, sar;
22323 UChar modrm = getUChar(delta);
22324 UInt rG = gregOfRexRM(pfx,modrm);
22325 UInt rV = getVexNvvvv(pfx);;
22326 IRTemp g0 = newTemp(Ity_V256);
22327 IRTemp g1 = newTemp(Ity_V256);
22328 IRTemp amt = newTemp(Ity_I64);
22329 IRTemp amt8 = newTemp(Ity_I8);
22330 if (epartIsReg(modrm)) {
22331 UInt rE = eregOfRexRM(pfx,modrm);
22332 assign( amt, getXMMRegLane64(rE, 0) );
22333 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22334 nameYMMReg(rV), nameYMMReg(rG) );
22335 delta++;
22336 } else {
22337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22338 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22339 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
22340 delta += alen;
22341 }
22342 assign( g0, getYMMReg(rV) );
22343 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22344
22345 shl = shr = sar = False;
22346 size = 0;
22347 switch (op) {
22348 case Iop_ShlN16x16: shl = True; size = 32; break;
22349 case Iop_ShlN32x8: shl = True; size = 32; break;
22350 case Iop_ShlN64x4: shl = True; size = 64; break;
22351 case Iop_SarN16x16: sar = True; size = 16; break;
22352 case Iop_SarN32x8: sar = True; size = 32; break;
22353 case Iop_ShrN16x16: shr = True; size = 16; break;
22354 case Iop_ShrN32x8: shr = True; size = 32; break;
22355 case Iop_ShrN64x4: shr = True; size = 64; break;
22356 default: vassert(0);
22357 }
22358
22359 if (shl || shr) {
22360 assign(
22361 g1,
22362 IRExpr_ITE(
22363 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22364 binop(op, mkexpr(g0), mkexpr(amt8)),
22365 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
22366 )
22367 );
22368 } else
22369 if (sar) {
22370 assign(
22371 g1,
22372 IRExpr_ITE(
22373 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22374 binop(op, mkexpr(g0), mkexpr(amt8)),
22375 binop(op, mkexpr(g0), mkU8(size-1))
22376 )
22377 );
22378 } else {
22379 vassert(0);
22380 }
22381
22382 putYMMReg( rG, mkexpr(g1) );
22383 return delta;
22384}
22385
22386
22387/* Vector by vector shift of V by the amount specified at the bottom
22388 of E. Vector by vector shifts are defined for all shift amounts,
22389 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
22390 anyway). */
floriancacba8e2014-12-15 18:58:07 +000022391static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000022392 Prefix pfx, Long delta,
22393 const HChar* opname, IROp op, Bool isYMM )
22394{
22395 HChar dis_buf[50];
22396 Int alen, size, i;
22397 IRTemp addr;
22398 UChar modrm = getUChar(delta);
22399 UInt rG = gregOfRexRM(pfx,modrm);
22400 UInt rV = getVexNvvvv(pfx);;
22401 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
22402 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
22403 IRTemp amts[8], sVs[8], res[8];
22404 if (epartIsReg(modrm)) {
22405 UInt rE = eregOfRexRM(pfx,modrm);
22406 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
22407 if (isYMM) {
22408 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
22409 nameYMMReg(rV), nameYMMReg(rG) );
22410 } else {
22411 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22412 nameXMMReg(rV), nameXMMReg(rG) );
22413 }
22414 delta++;
22415 } else {
22416 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22417 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
22418 if (isYMM) {
22419 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
22420 nameYMMReg(rG) );
22421 } else {
22422 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
22423 nameXMMReg(rG) );
22424 }
22425 delta += alen;
22426 }
22427 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
22428
22429 size = 0;
22430 switch (op) {
22431 case Iop_Shl32: size = 32; break;
22432 case Iop_Shl64: size = 64; break;
22433 case Iop_Sar32: size = 32; break;
22434 case Iop_Shr32: size = 32; break;
22435 case Iop_Shr64: size = 64; break;
22436 default: vassert(0);
22437 }
22438
22439 for (i = 0; i < 8; i++) {
22440 sVs[i] = IRTemp_INVALID;
22441 amts[i] = IRTemp_INVALID;
22442 }
22443 switch (size) {
22444 case 32:
22445 if (isYMM) {
22446 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
22447 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22448 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
22449 &amts[3], &amts[2], &amts[1], &amts[0] );
22450 } else {
22451 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22452 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
22453 }
22454 break;
22455 case 64:
22456 if (isYMM) {
22457 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22458 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
22459 } else {
22460 breakupV128to64s( sV, &sVs[1], &sVs[0] );
22461 breakupV128to64s( amt, &amts[1], &amts[0] );
22462 }
22463 break;
22464 default: vassert(0);
22465 }
22466 for (i = 0; i < 8; i++)
22467 if (sVs[i] != IRTemp_INVALID) {
22468 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
22469 assign( res[i],
22470 IRExpr_ITE(
22471 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
22472 mkexpr(amts[i]),
22473 size == 32 ? mkU32(size) : mkU64(size)),
22474 binop(op, mkexpr(sVs[i]),
22475 unop(size == 32 ? Iop_32to8 : Iop_64to8,
22476 mkexpr(amts[i]))),
22477 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
22478 : size == 32 ? mkU32(0) : mkU64(0)
22479 ));
22480 }
22481 switch (size) {
22482 case 32:
22483 for (i = 0; i < 8; i++)
22484 putYMMRegLane32( rG, i, (i < 4 || isYMM)
22485 ? mkexpr(res[i]) : mkU32(0) );
22486 break;
22487 case 64:
22488 for (i = 0; i < 4; i++)
22489 putYMMRegLane64( rG, i, (i < 2 || isYMM)
22490 ? mkexpr(res[i]) : mkU64(0) );
22491 break;
22492 default: vassert(0);
22493 }
22494
22495 return delta;
22496}
22497
22498
sewardjc4530ae2012-05-21 10:18:49 +000022499/* Vector by scalar shift of E into V, by an immediate byte. Modified
22500 version of dis_SSE_shiftE_imm. */
22501static
22502Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
florian55085f82012-11-21 00:36:55 +000022503 Long delta, const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022504{
22505 Bool shl, shr, sar;
22506 UChar rm = getUChar(delta);
22507 IRTemp e0 = newTemp(Ity_V128);
22508 IRTemp e1 = newTemp(Ity_V128);
22509 UInt rD = getVexNvvvv(pfx);
22510 UChar amt, size;
22511 vassert(epartIsReg(rm));
22512 vassert(gregLO3ofRM(rm) == 2
22513 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
22514 amt = getUChar(delta+1);
22515 delta += 2;
22516 DIP("%s $%d,%s,%s\n", opname,
22517 (Int)amt,
22518 nameXMMReg(eregOfRexRM(pfx,rm)),
22519 nameXMMReg(rD));
22520 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
22521
22522 shl = shr = sar = False;
22523 size = 0;
22524 switch (op) {
sewardj15ad1942012-06-20 10:21:05 +000022525 case Iop_ShlN16x8: shl = True; size = 16; break;
sewardjc4530ae2012-05-21 10:18:49 +000022526 case Iop_ShlN32x4: shl = True; size = 32; break;
sewardje8a7eb72012-06-12 14:59:17 +000022527 case Iop_ShlN64x2: shl = True; size = 64; break;
sewardjfe0c5e72012-06-15 15:48:07 +000022528 case Iop_SarN16x8: sar = True; size = 16; break;
sewardj15ad1942012-06-20 10:21:05 +000022529 case Iop_SarN32x4: sar = True; size = 32; break;
sewardj6faf7cc2012-05-25 15:53:01 +000022530 case Iop_ShrN16x8: shr = True; size = 16; break;
sewardj251b59e2012-05-25 13:51:07 +000022531 case Iop_ShrN32x4: shr = True; size = 32; break;
sewardj56c30312012-06-12 08:45:39 +000022532 case Iop_ShrN64x2: shr = True; size = 64; break;
sewardjc4530ae2012-05-21 10:18:49 +000022533 default: vassert(0);
22534 }
22535
22536 if (shl || shr) {
22537 assign( e1, amt >= size
22538 ? mkV128(0x0000)
22539 : binop(op, mkexpr(e0), mkU8(amt))
22540 );
22541 } else
22542 if (sar) {
22543 assign( e1, amt >= size
22544 ? binop(op, mkexpr(e0), mkU8(size-1))
22545 : binop(op, mkexpr(e0), mkU8(amt))
22546 );
22547 } else {
22548 vassert(0);
22549 }
22550
22551 putYMMRegLoAndZU( rD, mkexpr(e1) );
22552 return delta;
22553}
22554
22555
sewardjcc3d2192013-03-27 11:37:33 +000022556/* Vector by scalar shift of E into V, by an immediate byte. Modified
22557 version of dis_AVX128_shiftE_to_V_imm. */
22558static
22559Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
22560 Long delta, const HChar* opname, IROp op )
22561{
22562 Bool shl, shr, sar;
22563 UChar rm = getUChar(delta);
22564 IRTemp e0 = newTemp(Ity_V256);
22565 IRTemp e1 = newTemp(Ity_V256);
22566 UInt rD = getVexNvvvv(pfx);
22567 UChar amt, size;
22568 vassert(epartIsReg(rm));
22569 vassert(gregLO3ofRM(rm) == 2
22570 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
22571 amt = getUChar(delta+1);
22572 delta += 2;
22573 DIP("%s $%d,%s,%s\n", opname,
22574 (Int)amt,
22575 nameYMMReg(eregOfRexRM(pfx,rm)),
22576 nameYMMReg(rD));
22577 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
22578
22579 shl = shr = sar = False;
22580 size = 0;
22581 switch (op) {
22582 case Iop_ShlN16x16: shl = True; size = 16; break;
22583 case Iop_ShlN32x8: shl = True; size = 32; break;
22584 case Iop_ShlN64x4: shl = True; size = 64; break;
22585 case Iop_SarN16x16: sar = True; size = 16; break;
22586 case Iop_SarN32x8: sar = True; size = 32; break;
22587 case Iop_ShrN16x16: shr = True; size = 16; break;
22588 case Iop_ShrN32x8: shr = True; size = 32; break;
22589 case Iop_ShrN64x4: shr = True; size = 64; break;
22590 default: vassert(0);
22591 }
22592
22593
22594 if (shl || shr) {
22595 assign( e1, amt >= size
22596 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
22597 : binop(op, mkexpr(e0), mkU8(amt))
22598 );
22599 } else
22600 if (sar) {
22601 assign( e1, amt >= size
22602 ? binop(op, mkexpr(e0), mkU8(size-1))
22603 : binop(op, mkexpr(e0), mkU8(amt))
22604 );
22605 } else {
22606 vassert(0);
22607 }
22608
22609 putYMMReg( rD, mkexpr(e1) );
22610 return delta;
22611}
22612
22613
sewardjc4530ae2012-05-21 10:18:49 +000022614/* Lower 64-bit lane only AVX128 binary operation:
22615 G[63:0] = V[63:0] `op` E[63:0]
22616 G[127:64] = V[127:64]
22617 G[255:128] = 0.
22618 The specified op must be of the 64F0x2 kind, so that it
22619 copies the upper half of the left operand to the result.
22620*/
22621static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022622 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000022623 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022624 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022625{
22626 HChar dis_buf[50];
22627 Int alen;
22628 IRTemp addr;
22629 UChar rm = getUChar(delta);
22630 UInt rG = gregOfRexRM(pfx,rm);
22631 UInt rV = getVexNvvvv(pfx);
22632 IRExpr* vpart = getXMMReg(rV);
22633 if (epartIsReg(rm)) {
22634 UInt rE = eregOfRexRM(pfx,rm);
22635 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
22636 DIP("%s %s,%s,%s\n", opname,
22637 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22638 delta = delta+1;
22639 } else {
22640 /* We can only do a 64-bit memory read, so the upper half of the
22641 E operand needs to be made simply of zeroes. */
22642 IRTemp epart = newTemp(Ity_V128);
22643 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22644 assign( epart, unop( Iop_64UtoV128,
22645 loadLE(Ity_I64, mkexpr(addr))) );
22646 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
22647 DIP("%s %s,%s,%s\n", opname,
22648 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22649 delta = delta+alen;
22650 }
22651 putYMMRegLane128( rG, 1, mkV128(0) );
22652 *uses_vvvv = True;
22653 return delta;
22654}
22655
22656
22657/* Lower 64-bit lane only AVX128 unary operation:
22658 G[63:0] = op(E[63:0])
22659 G[127:64] = V[127:64]
22660 G[255:128] = 0
22661 The specified op must be of the 64F0x2 kind, so that it
22662 copies the upper half of the operand to the result.
22663*/
22664static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022665 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000022666 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022667 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022668{
22669 HChar dis_buf[50];
22670 Int alen;
22671 IRTemp addr;
22672 UChar rm = getUChar(delta);
22673 UInt rG = gregOfRexRM(pfx,rm);
22674 UInt rV = getVexNvvvv(pfx);
22675 IRTemp e64 = newTemp(Ity_I64);
22676
22677 /* Fetch E[63:0] */
22678 if (epartIsReg(rm)) {
22679 UInt rE = eregOfRexRM(pfx,rm);
22680 assign(e64, getXMMRegLane64(rE, 0));
22681 DIP("%s %s,%s,%s\n", opname,
22682 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22683 delta += 1;
22684 } else {
22685 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22686 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
22687 DIP("%s %s,%s,%s\n", opname,
22688 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22689 delta += alen;
22690 }
22691
22692 /* Create a value 'arg' as V[127:64]++E[63:0] */
22693 IRTemp arg = newTemp(Ity_V128);
22694 assign(arg,
22695 binop(Iop_SetV128lo64,
22696 getXMMReg(rV), mkexpr(e64)));
22697 /* and apply op to it */
22698 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
22699 *uses_vvvv = True;
22700 return delta;
22701}
22702
22703
sewardj66becf32012-06-18 23:15:16 +000022704/* Lower 32-bit lane only AVX128 unary operation:
22705 G[31:0] = op(E[31:0])
22706 G[127:32] = V[127:32]
22707 G[255:128] = 0
22708 The specified op must be of the 32F0x4 kind, so that it
22709 copies the upper 3/4 of the operand to the result.
22710*/
22711static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022712 const VexAbiInfo* vbi,
sewardj66becf32012-06-18 23:15:16 +000022713 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022714 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000022715{
22716 HChar dis_buf[50];
22717 Int alen;
22718 IRTemp addr;
22719 UChar rm = getUChar(delta);
22720 UInt rG = gregOfRexRM(pfx,rm);
22721 UInt rV = getVexNvvvv(pfx);
22722 IRTemp e32 = newTemp(Ity_I32);
22723
22724 /* Fetch E[31:0] */
22725 if (epartIsReg(rm)) {
22726 UInt rE = eregOfRexRM(pfx,rm);
22727 assign(e32, getXMMRegLane32(rE, 0));
22728 DIP("%s %s,%s,%s\n", opname,
22729 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22730 delta += 1;
22731 } else {
22732 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22733 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
22734 DIP("%s %s,%s,%s\n", opname,
22735 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22736 delta += alen;
22737 }
22738
22739 /* Create a value 'arg' as V[127:32]++E[31:0] */
22740 IRTemp arg = newTemp(Ity_V128);
22741 assign(arg,
22742 binop(Iop_SetV128lo32,
22743 getXMMReg(rV), mkexpr(e32)));
22744 /* and apply op to it */
22745 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
22746 *uses_vvvv = True;
22747 return delta;
22748}
22749
22750
sewardjc4530ae2012-05-21 10:18:49 +000022751/* Lower 32-bit lane only AVX128 binary operation:
22752 G[31:0] = V[31:0] `op` E[31:0]
22753 G[127:32] = V[127:32]
22754 G[255:128] = 0.
22755 The specified op must be of the 32F0x4 kind, so that it
22756 copies the upper 3/4 of the left operand to the result.
22757*/
22758static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022759 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000022760 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022761 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022762{
22763 HChar dis_buf[50];
22764 Int alen;
22765 IRTemp addr;
22766 UChar rm = getUChar(delta);
22767 UInt rG = gregOfRexRM(pfx,rm);
22768 UInt rV = getVexNvvvv(pfx);
22769 IRExpr* vpart = getXMMReg(rV);
22770 if (epartIsReg(rm)) {
22771 UInt rE = eregOfRexRM(pfx,rm);
22772 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
22773 DIP("%s %s,%s,%s\n", opname,
22774 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22775 delta = delta+1;
22776 } else {
22777 /* We can only do a 32-bit memory read, so the upper 3/4 of the
22778 E operand needs to be made simply of zeroes. */
22779 IRTemp epart = newTemp(Ity_V128);
22780 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22781 assign( epart, unop( Iop_32UtoV128,
22782 loadLE(Ity_I32, mkexpr(addr))) );
22783 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
22784 DIP("%s %s,%s,%s\n", opname,
22785 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22786 delta = delta+alen;
22787 }
22788 putYMMRegLane128( rG, 1, mkV128(0) );
22789 *uses_vvvv = True;
22790 return delta;
22791}
22792
22793
sewardj251b59e2012-05-25 13:51:07 +000022794/* All-lanes AVX128 binary operation:
sewardj56c30312012-06-12 08:45:39 +000022795 G[127:0] = V[127:0] `op` E[127:0]
sewardj251b59e2012-05-25 13:51:07 +000022796 G[255:128] = 0.
22797*/
22798static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022799 const VexAbiInfo* vbi,
sewardj251b59e2012-05-25 13:51:07 +000022800 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022801 const HChar* opname, IROp op )
sewardj251b59e2012-05-25 13:51:07 +000022802{
sewardj29a219c2012-06-04 07:38:10 +000022803 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22804 uses_vvvv, vbi, pfx, delta, opname, op,
22805 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
22806 );
sewardj251b59e2012-05-25 13:51:07 +000022807}
22808
22809
sewardjc4530ae2012-05-21 10:18:49 +000022810/* Handles AVX128 32F/64F comparisons. A derivative of
22811 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
22812 original delta to indicate failure. */
22813static
22814Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022815 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000022816 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022817 const HChar* opname, Bool all_lanes, Int sz )
sewardjc4530ae2012-05-21 10:18:49 +000022818{
sewardjc93904b2012-05-27 13:50:42 +000022819 vassert(sz == 4 || sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +000022820 Long deltaIN = delta;
22821 HChar dis_buf[50];
22822 Int alen;
22823 UInt imm8;
22824 IRTemp addr;
22825 Bool preSwap = False;
22826 IROp op = Iop_INVALID;
22827 Bool postNot = False;
22828 IRTemp plain = newTemp(Ity_V128);
22829 UChar rm = getUChar(delta);
sewardjc93904b2012-05-27 13:50:42 +000022830 UInt rG = gregOfRexRM(pfx, rm);
22831 UInt rV = getVexNvvvv(pfx);
22832 IRTemp argL = newTemp(Ity_V128);
22833 IRTemp argR = newTemp(Ity_V128);
22834
22835 assign(argL, getXMMReg(rV));
sewardjc4530ae2012-05-21 10:18:49 +000022836 if (epartIsReg(rm)) {
22837 imm8 = getUChar(delta+1);
22838 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
22839 if (!ok) return deltaIN; /* FAIL */
22840 UInt rE = eregOfRexRM(pfx,rm);
sewardjc93904b2012-05-27 13:50:42 +000022841 assign(argR, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000022842 delta += 1+1;
22843 DIP("%s $%d,%s,%s,%s\n",
22844 opname, (Int)imm8,
22845 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22846 } else {
22847 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
22848 imm8 = getUChar(delta+alen);
22849 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
22850 if (!ok) return deltaIN; /* FAIL */
sewardjc93904b2012-05-27 13:50:42 +000022851 assign(argR,
22852 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
sewardjc4530ae2012-05-21 10:18:49 +000022853 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
sewardjc93904b2012-05-27 13:50:42 +000022854 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
sewardjc4530ae2012-05-21 10:18:49 +000022855 delta += alen+1;
22856 DIP("%s $%d,%s,%s,%s\n",
22857 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22858 }
22859
sewardjc93904b2012-05-27 13:50:42 +000022860 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL))
22861 : binop(op, mkexpr(argL), mkexpr(argR)));
sewardjc4530ae2012-05-21 10:18:49 +000022862
sewardjc93904b2012-05-27 13:50:42 +000022863 if (all_lanes) {
22864 /* This is simple: just invert the result, if necessary, and
22865 have done. */
22866 if (postNot) {
22867 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
22868 } else {
22869 putYMMRegLoAndZU( rG, mkexpr(plain) );
22870 }
sewardjc4530ae2012-05-21 10:18:49 +000022871 }
22872 else
sewardjc93904b2012-05-27 13:50:42 +000022873 if (!preSwap) {
22874 /* More complex. It's a one-lane-only, hence need to possibly
22875 invert only that one lane. But at least the other lanes are
22876 correctly "in" the result, having been copied from the left
22877 operand (argL). */
22878 if (postNot) {
22879 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
22880 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
22881 mask) );
22882 } else {
22883 putYMMRegLoAndZU( rG, mkexpr(plain) );
22884 }
sewardjc4530ae2012-05-21 10:18:49 +000022885 }
22886 else {
sewardjc93904b2012-05-27 13:50:42 +000022887 /* This is the most complex case. One-lane-only, but the args
22888 were swapped. So we have to possibly invert the bottom lane,
22889 and (definitely) we have to copy the upper lane(s) from argL
22890 since, due to the swapping, what's currently there is from
22891 argR, which is not correct. */
22892 IRTemp res = newTemp(Ity_V128);
22893 IRTemp mask = newTemp(Ity_V128);
22894 IRTemp notMask = newTemp(Ity_V128);
22895 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
22896 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
22897 if (postNot) {
22898 assign(res,
22899 binop(Iop_OrV128,
22900 binop(Iop_AndV128,
22901 unop(Iop_NotV128, mkexpr(plain)),
22902 mkexpr(mask)),
22903 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
22904 } else {
22905 assign(res,
22906 binop(Iop_OrV128,
22907 binop(Iop_AndV128,
22908 mkexpr(plain),
22909 mkexpr(mask)),
22910 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
22911 }
22912 putYMMRegLoAndZU( rG, mkexpr(res) );
sewardjc4530ae2012-05-21 10:18:49 +000022913 }
22914
22915 *uses_vvvv = True;
22916 return delta;
22917}
22918
22919
sewardj89378162012-06-24 12:12:20 +000022920/* Handles AVX256 32F/64F comparisons. A derivative of
22921 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
22922 original delta to indicate failure. */
22923static
22924Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022925 const VexAbiInfo* vbi,
sewardj89378162012-06-24 12:12:20 +000022926 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022927 const HChar* opname, Int sz )
sewardj89378162012-06-24 12:12:20 +000022928{
22929 vassert(sz == 4 || sz == 8);
22930 Long deltaIN = delta;
22931 HChar dis_buf[50];
22932 Int alen;
22933 UInt imm8;
22934 IRTemp addr;
22935 Bool preSwap = False;
22936 IROp op = Iop_INVALID;
22937 Bool postNot = False;
22938 IRTemp plain = newTemp(Ity_V256);
22939 UChar rm = getUChar(delta);
22940 UInt rG = gregOfRexRM(pfx, rm);
22941 UInt rV = getVexNvvvv(pfx);
22942 IRTemp argL = newTemp(Ity_V256);
22943 IRTemp argR = newTemp(Ity_V256);
22944 IRTemp argLhi = IRTemp_INVALID;
22945 IRTemp argLlo = IRTemp_INVALID;
22946 IRTemp argRhi = IRTemp_INVALID;
22947 IRTemp argRlo = IRTemp_INVALID;
22948
22949 assign(argL, getYMMReg(rV));
22950 if (epartIsReg(rm)) {
22951 imm8 = getUChar(delta+1);
22952 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
22953 True/*all_lanes*/, sz);
22954 if (!ok) return deltaIN; /* FAIL */
22955 UInt rE = eregOfRexRM(pfx,rm);
22956 assign(argR, getYMMReg(rE));
22957 delta += 1+1;
22958 DIP("%s $%d,%s,%s,%s\n",
22959 opname, (Int)imm8,
22960 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
22961 } else {
22962 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
22963 imm8 = getUChar(delta+alen);
22964 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
22965 True/*all_lanes*/, sz);
22966 if (!ok) return deltaIN; /* FAIL */
22967 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
22968 delta += alen+1;
22969 DIP("%s $%d,%s,%s,%s\n",
22970 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
22971 }
22972
sewardjb1a41a22012-06-24 13:27:46 +000022973 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
22974 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
sewardj89378162012-06-24 12:12:20 +000022975 assign(plain, binop( Iop_V128HLtoV256,
22976 binop(op, mkexpr(argLhi), mkexpr(argRhi)),
22977 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) );
22978
22979 /* This is simple: just invert the result, if necessary, and
22980 have done. */
22981 if (postNot) {
22982 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
22983 } else {
22984 putYMMReg( rG, mkexpr(plain) );
22985 }
22986
22987 *uses_vvvv = True;
22988 return delta;
22989}
22990
22991
sewardj97f72452012-05-23 05:56:53 +000022992/* Handles AVX128 unary E-to-G all-lanes operations. */
22993static
22994Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000022995 const VexAbiInfo* vbi,
sewardj97f72452012-05-23 05:56:53 +000022996 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022997 const HChar* opname,
sewardj97f72452012-05-23 05:56:53 +000022998 IRTemp (*opFn)(IRTemp) )
22999{
23000 HChar dis_buf[50];
23001 Int alen;
23002 IRTemp addr;
23003 IRTemp res = newTemp(Ity_V128);
23004 IRTemp arg = newTemp(Ity_V128);
23005 UChar rm = getUChar(delta);
23006 UInt rG = gregOfRexRM(pfx, rm);
23007 if (epartIsReg(rm)) {
23008 UInt rE = eregOfRexRM(pfx,rm);
23009 assign(arg, getXMMReg(rE));
23010 delta += 1;
23011 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23012 } else {
sewardj29ac4282012-05-24 06:31:21 +000023013 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj97f72452012-05-23 05:56:53 +000023014 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23015 delta += alen;
23016 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23017 }
23018 res = opFn(arg);
23019 putYMMRegLoAndZU( rG, mkexpr(res) );
23020 *uses_vvvv = False;
23021 return delta;
23022}
23023
23024
sewardj66becf32012-06-18 23:15:16 +000023025/* Handles AVX128 unary E-to-G all-lanes operations. */
23026static
23027Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000023028 const VexAbiInfo* vbi,
sewardj66becf32012-06-18 23:15:16 +000023029 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023030 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000023031{
23032 HChar dis_buf[50];
23033 Int alen;
23034 IRTemp addr;
23035 IRTemp arg = newTemp(Ity_V128);
23036 UChar rm = getUChar(delta);
23037 UInt rG = gregOfRexRM(pfx, rm);
23038 if (epartIsReg(rm)) {
23039 UInt rE = eregOfRexRM(pfx,rm);
23040 assign(arg, getXMMReg(rE));
23041 delta += 1;
23042 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23043 } else {
23044 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23045 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23046 delta += alen;
23047 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23048 }
23049 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23050 *uses_vvvv = False;
23051 return delta;
23052}
23053
23054
sewardj56c30312012-06-12 08:45:39 +000023055/* FIXME: common up with the _128_ version above? */
23056static
23057Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
floriancacba8e2014-12-15 18:58:07 +000023058 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000023059 Prefix pfx, Long delta, const HChar* name,
sewardj56c30312012-06-12 08:45:39 +000023060 /* The actual operation. Use either 'op' or 'opfn',
23061 but not both. */
23062 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23063 Bool invertLeftArg,
23064 Bool swapArgs
23065 )
23066{
23067 UChar modrm = getUChar(delta);
23068 UInt rD = gregOfRexRM(pfx, modrm);
23069 UInt rSL = getVexNvvvv(pfx);
23070 IRTemp tSL = newTemp(Ity_V256);
23071 IRTemp tSR = newTemp(Ity_V256);
23072 IRTemp addr = IRTemp_INVALID;
23073 HChar dis_buf[50];
23074 Int alen = 0;
23075 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23076
sewardj2a2bda92012-06-14 23:32:02 +000023077 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23078 : getYMMReg(rSL));
sewardj56c30312012-06-12 08:45:39 +000023079
23080 if (epartIsReg(modrm)) {
23081 UInt rSR = eregOfRexRM(pfx, modrm);
23082 delta += 1;
23083 assign(tSR, getYMMReg(rSR));
23084 DIP("%s %s,%s,%s\n",
23085 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23086 } else {
23087 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23088 delta += alen;
23089 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23090 DIP("%s %s,%s,%s\n",
23091 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23092 }
23093
23094 IRTemp res = IRTemp_INVALID;
23095 if (op != Iop_INVALID) {
23096 vassert(opFn == NULL);
23097 res = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000023098 if (requiresRMode(op)) {
23099 IRTemp rm = newTemp(Ity_I32);
23100 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23101 assign(res, swapArgs
23102 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23103 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23104 } else {
23105 assign(res, swapArgs
23106 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23107 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23108 }
sewardj56c30312012-06-12 08:45:39 +000023109 } else {
23110 vassert(opFn != NULL);
23111 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23112 }
23113
23114 putYMMReg(rD, mkexpr(res));
23115
23116 *uses_vvvv = True;
23117 return delta;
23118}
23119
23120
23121/* All-lanes AVX256 binary operation:
23122 G[255:0] = V[255:0] `op` E[255:0]
23123*/
23124static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000023125 const VexAbiInfo* vbi,
sewardj56c30312012-06-12 08:45:39 +000023126 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023127 const HChar* opname, IROp op )
sewardj56c30312012-06-12 08:45:39 +000023128{
23129 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23130 uses_vvvv, vbi, pfx, delta, opname, op,
23131 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23132 );
23133}
23134
23135
sewardjcc3d2192013-03-27 11:37:33 +000023136/* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23137 for the operation, no inversion of the left arg, and no swapping of
23138 args. */
23139static
23140Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
floriancacba8e2014-12-15 18:58:07 +000023141 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000023142 Prefix pfx, Long delta, const HChar* name,
23143 IROp op
23144 )
23145{
23146 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23147 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23148}
23149
23150
sewardj89378162012-06-24 12:12:20 +000023151/* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23152 generator to compute the result, no inversion of the left
23153 arg, and no swapping of args. */
23154static
23155Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
floriancacba8e2014-12-15 18:58:07 +000023156 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000023157 Prefix pfx, Long delta, const HChar* name,
sewardj89378162012-06-24 12:12:20 +000023158 IRTemp(*opFn)(IRTemp,IRTemp)
23159 )
23160{
23161 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23162 uses_vvvv, vbi, pfx, delta, name,
23163 Iop_INVALID, opFn, False, False );
23164}
23165
23166
sewardj66becf32012-06-18 23:15:16 +000023167/* Handles AVX256 unary E-to-G all-lanes operations. */
23168static
sewardjcc3d2192013-03-27 11:37:33 +000023169Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000023170 const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000023171 Prefix pfx, Long delta,
23172 const HChar* opname,
23173 IRTemp (*opFn)(IRTemp) )
23174{
23175 HChar dis_buf[50];
23176 Int alen;
23177 IRTemp addr;
23178 IRTemp res = newTemp(Ity_V256);
23179 IRTemp arg = newTemp(Ity_V256);
23180 UChar rm = getUChar(delta);
23181 UInt rG = gregOfRexRM(pfx, rm);
23182 if (epartIsReg(rm)) {
23183 UInt rE = eregOfRexRM(pfx,rm);
23184 assign(arg, getYMMReg(rE));
23185 delta += 1;
23186 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23187 } else {
23188 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23189 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23190 delta += alen;
23191 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23192 }
23193 res = opFn(arg);
23194 putYMMReg( rG, mkexpr(res) );
23195 *uses_vvvv = False;
23196 return delta;
23197}
23198
23199
23200/* Handles AVX256 unary E-to-G all-lanes operations. */
23201static
sewardj66becf32012-06-18 23:15:16 +000023202Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000023203 const VexAbiInfo* vbi,
sewardj66becf32012-06-18 23:15:16 +000023204 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023205 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000023206{
23207 HChar dis_buf[50];
23208 Int alen;
23209 IRTemp addr;
23210 IRTemp arg = newTemp(Ity_V256);
23211 UChar rm = getUChar(delta);
23212 UInt rG = gregOfRexRM(pfx, rm);
23213 if (epartIsReg(rm)) {
23214 UInt rE = eregOfRexRM(pfx,rm);
23215 assign(arg, getYMMReg(rE));
23216 delta += 1;
23217 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23218 } else {
23219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23220 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23221 delta += alen;
23222 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23223 }
23224 putYMMReg( rG, unop(op, mkexpr(arg)) );
23225 *uses_vvvv = False;
23226 return delta;
23227}
23228
23229
sewardj6fcd43e2012-06-14 08:51:35 +000023230/* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23231 had a variant of Iop_64x4toV256 that took F64s as args instead. */
floriancacba8e2014-12-15 18:58:07 +000023232static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000023233 Long delta )
23234{
23235 IRTemp addr = IRTemp_INVALID;
23236 Int alen = 0;
23237 HChar dis_buf[50];
23238 UChar modrm = getUChar(delta);
23239 IRTemp sV = newTemp(Ity_V128);
23240 UInt rG = gregOfRexRM(pfx,modrm);
23241 if (epartIsReg(modrm)) {
23242 UInt rE = eregOfRexRM(pfx,modrm);
23243 assign( sV, getXMMReg(rE) );
23244 delta += 1;
23245 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
23246 } else {
23247 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23248 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23249 delta += alen;
23250 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
23251 }
23252 IRTemp s3, s2, s1, s0;
23253 s3 = s2 = s1 = s0 = IRTemp_INVALID;
23254 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
23255 IRExpr* res
23256 = IRExpr_Qop(
23257 Iop_64x4toV256,
23258 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
23259 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
23260 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
23261 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
23262 );
23263 putYMMReg(rG, res);
23264 return delta;
23265}
23266
23267
floriancacba8e2014-12-15 18:58:07 +000023268static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
sewardj6fcd43e2012-06-14 08:51:35 +000023269 Long delta )
23270{
23271 IRTemp addr = IRTemp_INVALID;
23272 Int alen = 0;
23273 HChar dis_buf[50];
23274 UChar modrm = getUChar(delta);
23275 UInt rG = gregOfRexRM(pfx,modrm);
23276 IRTemp argV = newTemp(Ity_V256);
23277 IRTemp rmode = newTemp(Ity_I32);
23278 if (epartIsReg(modrm)) {
23279 UInt rE = eregOfRexRM(pfx,modrm);
23280 assign( argV, getYMMReg(rE) );
23281 delta += 1;
23282 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
23283 } else {
23284 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23285 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
23286 delta += alen;
23287 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
23288 }
23289
23290 assign( rmode, get_sse_roundingmode() );
23291 IRTemp t3, t2, t1, t0;
23292 t3 = t2 = t1 = t0 = IRTemp_INVALID;
23293 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
23294# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23295 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23296 putXMMRegLane32F( rG, 3, CVT(t3) );
23297 putXMMRegLane32F( rG, 2, CVT(t2) );
23298 putXMMRegLane32F( rG, 1, CVT(t1) );
23299 putXMMRegLane32F( rG, 0, CVT(t0) );
23300# undef CVT
23301 putYMMRegLane128( rG, 1, mkV128(0) );
23302 return delta;
23303}
23304
23305
sewardjcc3d2192013-03-27 11:37:33 +000023306static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
23307{
23308 IRTemp tLhi, tLlo, tRhi, tRlo;
23309 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
23310 IRTemp res = newTemp(Ity_V256);
23311 breakupV256toV128s( tL, &tLhi, &tLlo );
23312 breakupV256toV128s( tR, &tRhi, &tRlo );
23313 assign( res, binop( Iop_V128HLtoV256,
23314 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
23315 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
23316 return res;
23317}
23318
23319
23320static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
23321{
23322 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
23323}
23324
23325
23326static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
23327{
23328 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
23329}
23330
23331
23332static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
23333{
23334 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
23335}
23336
23337
23338static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
23339{
23340 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
23341}
23342
23343
23344static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
23345{
23346 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
23347}
23348
23349
23350static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
23351{
23352 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
23353}
23354
23355
23356static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
23357{
23358 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
23359}
23360
23361
23362static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
23363{
23364 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
23365}
23366
23367
23368static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
23369{
23370 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
23371}
23372
23373
23374static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
23375{
23376 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
23377}
23378
23379
23380static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
23381{
23382 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
23383}
23384
23385
23386static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
23387{
23388 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
23389}
23390
23391
sewardjc4530ae2012-05-21 10:18:49 +000023392__attribute__((noinline))
23393static
23394Long dis_ESC_0F__VEX (
23395 /*MB_OUT*/DisResult* dres,
23396 /*OUT*/ Bool* uses_vvvv,
florianbeac5302014-12-31 12:09:38 +000023397 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardjc4530ae2012-05-21 10:18:49 +000023398 Bool resteerCisOk,
23399 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000023400 const VexArchInfo* archinfo,
23401 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000023402 Prefix pfx, Int sz, Long deltaIN
23403 )
23404{
23405 IRTemp addr = IRTemp_INVALID;
23406 Int alen = 0;
23407 HChar dis_buf[50];
23408 Long delta = deltaIN;
23409 UChar opc = getUChar(delta);
23410 delta++;
23411 *uses_vvvv = False;
23412
23413 switch (opc) {
23414
23415 case 0x10:
23416 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
23417 /* Move 64 bits from E (mem only) to G (lo half xmm).
23418 Bits 255-64 of the dest are zeroed out. */
23419 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
23420 UChar modrm = getUChar(delta);
23421 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23422 UInt rG = gregOfRexRM(pfx,modrm);
23423 IRTemp z128 = newTemp(Ity_V128);
23424 assign(z128, mkV128(0));
23425 putXMMReg( rG, mkexpr(z128) );
23426 /* FIXME: ALIGNMENT CHECK? */
23427 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
23428 putYMMRegLane128( rG, 1, mkexpr(z128) );
23429 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
23430 delta += alen;
23431 goto decode_success;
23432 }
sewardj21459cb2012-06-18 14:05:52 +000023433 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
23434 /* Reg form. */
23435 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
23436 UChar modrm = getUChar(delta);
23437 UInt rG = gregOfRexRM(pfx, modrm);
23438 UInt rE = eregOfRexRM(pfx, modrm);
23439 UInt rV = getVexNvvvv(pfx);
23440 delta++;
23441 DIP("vmovsd %s,%s,%s\n",
23442 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23443 IRTemp res = newTemp(Ity_V128);
23444 assign(res, binop(Iop_64HLtoV128,
23445 getXMMRegLane64(rV, 1),
23446 getXMMRegLane64(rE, 0)));
23447 putYMMRegLoAndZU(rG, mkexpr(res));
23448 *uses_vvvv = True;
23449 goto decode_success;
23450 }
sewardjc4530ae2012-05-21 10:18:49 +000023451 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
23452 /* Move 32 bits from E (mem only) to G (lo half xmm).
23453 Bits 255-32 of the dest are zeroed out. */
23454 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
23455 UChar modrm = getUChar(delta);
23456 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23457 UInt rG = gregOfRexRM(pfx,modrm);
23458 IRTemp z128 = newTemp(Ity_V128);
23459 assign(z128, mkV128(0));
23460 putXMMReg( rG, mkexpr(z128) );
23461 /* FIXME: ALIGNMENT CHECK? */
23462 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
23463 putYMMRegLane128( rG, 1, mkexpr(z128) );
23464 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
23465 delta += alen;
23466 goto decode_success;
23467 }
sewardj15ad1942012-06-20 10:21:05 +000023468 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
23469 /* Reg form. */
23470 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
23471 UChar modrm = getUChar(delta);
23472 UInt rG = gregOfRexRM(pfx, modrm);
23473 UInt rE = eregOfRexRM(pfx, modrm);
23474 UInt rV = getVexNvvvv(pfx);
23475 delta++;
23476 DIP("vmovss %s,%s,%s\n",
23477 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23478 IRTemp res = newTemp(Ity_V128);
23479 assign( res, binop( Iop_64HLtoV128,
23480 getXMMRegLane64(rV, 1),
23481 binop(Iop_32HLto64,
23482 getXMMRegLane32(rV, 1),
23483 getXMMRegLane32(rE, 0)) ) );
23484 putYMMRegLoAndZU(rG, mkexpr(res));
23485 *uses_vvvv = True;
23486 goto decode_success;
23487 }
sewardj56c30312012-06-12 08:45:39 +000023488 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
23489 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23490 UChar modrm = getUChar(delta);
23491 UInt rG = gregOfRexRM(pfx, modrm);
23492 if (epartIsReg(modrm)) {
23493 UInt rE = eregOfRexRM(pfx,modrm);
23494 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23495 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23496 delta += 1;
23497 } else {
23498 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23499 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23500 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
23501 delta += alen;
23502 }
23503 goto decode_success;
23504 }
sewardjfce47a62012-06-03 23:12:33 +000023505 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
23506 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23507 UChar modrm = getUChar(delta);
23508 UInt rG = gregOfRexRM(pfx, modrm);
23509 if (epartIsReg(modrm)) {
23510 UInt rE = eregOfRexRM(pfx,modrm);
23511 putYMMReg( rG, getYMMReg( rE ));
23512 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23513 delta += 1;
23514 } else {
23515 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23516 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23517 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
23518 delta += alen;
23519 }
23520 goto decode_success;
23521 }
sewardj56c30312012-06-12 08:45:39 +000023522 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
23523 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23524 UChar modrm = getUChar(delta);
23525 UInt rG = gregOfRexRM(pfx, modrm);
23526 if (epartIsReg(modrm)) {
23527 UInt rE = eregOfRexRM(pfx,modrm);
23528 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23529 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23530 delta += 1;
23531 } else {
23532 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23533 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23534 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
23535 delta += alen;
23536 }
23537 goto decode_success;
23538 }
sewardj66becf32012-06-18 23:15:16 +000023539 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
23540 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23541 UChar modrm = getUChar(delta);
23542 UInt rG = gregOfRexRM(pfx, modrm);
23543 if (epartIsReg(modrm)) {
23544 UInt rE = eregOfRexRM(pfx,modrm);
23545 putYMMReg( rG, getYMMReg( rE ));
23546 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23547 delta += 1;
23548 } else {
23549 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23550 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23551 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
23552 delta += alen;
23553 }
23554 goto decode_success;
23555 }
sewardjc4530ae2012-05-21 10:18:49 +000023556 break;
23557
23558 case 0x11:
23559 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
23560 /* Move 64 bits from G (low half xmm) to mem only. */
23561 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
23562 UChar modrm = getUChar(delta);
23563 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23564 UInt rG = gregOfRexRM(pfx,modrm);
23565 /* FIXME: ALIGNMENT CHECK? */
23566 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
23567 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
23568 delta += alen;
23569 goto decode_success;
23570 }
sewardj21459cb2012-06-18 14:05:52 +000023571 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
23572 /* Reg form. */
23573 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
23574 UChar modrm = getUChar(delta);
23575 UInt rG = gregOfRexRM(pfx, modrm);
23576 UInt rE = eregOfRexRM(pfx, modrm);
23577 UInt rV = getVexNvvvv(pfx);
23578 delta++;
23579 DIP("vmovsd %s,%s,%s\n",
23580 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23581 IRTemp res = newTemp(Ity_V128);
23582 assign(res, binop(Iop_64HLtoV128,
23583 getXMMRegLane64(rV, 1),
23584 getXMMRegLane64(rE, 0)));
23585 putYMMRegLoAndZU(rG, mkexpr(res));
23586 *uses_vvvv = True;
23587 goto decode_success;
23588 }
sewardjc4530ae2012-05-21 10:18:49 +000023589 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
23590 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
23591 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
23592 UChar modrm = getUChar(delta);
23593 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23594 UInt rG = gregOfRexRM(pfx,modrm);
23595 /* FIXME: ALIGNMENT CHECK? */
23596 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
23597 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
23598 delta += alen;
23599 goto decode_success;
23600 }
sewardj15ad1942012-06-20 10:21:05 +000023601 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
23602 /* Reg form. */
23603 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
23604 UChar modrm = getUChar(delta);
23605 UInt rG = gregOfRexRM(pfx, modrm);
23606 UInt rE = eregOfRexRM(pfx, modrm);
23607 UInt rV = getVexNvvvv(pfx);
23608 delta++;
23609 DIP("vmovss %s,%s,%s\n",
23610 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23611 IRTemp res = newTemp(Ity_V128);
23612 assign( res, binop( Iop_64HLtoV128,
23613 getXMMRegLane64(rV, 1),
23614 binop(Iop_32HLto64,
23615 getXMMRegLane32(rV, 1),
23616 getXMMRegLane32(rE, 0)) ) );
23617 putYMMRegLoAndZU(rG, mkexpr(res));
23618 *uses_vvvv = True;
23619 goto decode_success;
23620 }
sewardjc4530ae2012-05-21 10:18:49 +000023621 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
23622 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23623 UChar modrm = getUChar(delta);
23624 UInt rG = gregOfRexRM(pfx,modrm);
23625 if (epartIsReg(modrm)) {
23626 UInt rE = eregOfRexRM(pfx,modrm);
23627 putYMMRegLoAndZU( rE, getXMMReg(rG) );
23628 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
23629 delta += 1;
23630 } else {
23631 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23632 storeLE( mkexpr(addr), getXMMReg(rG) );
23633 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
23634 delta += alen;
23635 }
23636 goto decode_success;
23637 }
sewardj4ed05e02012-06-18 15:01:30 +000023638 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
23639 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23640 UChar modrm = getUChar(delta);
23641 UInt rG = gregOfRexRM(pfx,modrm);
23642 if (epartIsReg(modrm)) {
23643 UInt rE = eregOfRexRM(pfx,modrm);
23644 putYMMReg( rE, getYMMReg(rG) );
23645 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
23646 delta += 1;
23647 } else {
23648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23649 storeLE( mkexpr(addr), getYMMReg(rG) );
23650 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
23651 delta += alen;
23652 }
23653 goto decode_success;
23654 }
sewardj6eaf00c2012-05-23 11:33:56 +000023655 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
23656 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23657 UChar modrm = getUChar(delta);
23658 UInt rG = gregOfRexRM(pfx,modrm);
23659 if (epartIsReg(modrm)) {
23660 UInt rE = eregOfRexRM(pfx,modrm);
23661 putYMMRegLoAndZU( rE, getXMMReg(rG) );
23662 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
23663 delta += 1;
23664 } else {
23665 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23666 storeLE( mkexpr(addr), getXMMReg(rG) );
23667 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
23668 delta += alen;
23669 }
23670 goto decode_success;
23671 }
sewardj4ed05e02012-06-18 15:01:30 +000023672 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
23673 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfce47a62012-06-03 23:12:33 +000023674 UChar modrm = getUChar(delta);
23675 UInt rG = gregOfRexRM(pfx,modrm);
23676 if (epartIsReg(modrm)) {
23677 UInt rE = eregOfRexRM(pfx,modrm);
23678 putYMMReg( rE, getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000023679 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjfce47a62012-06-03 23:12:33 +000023680 delta += 1;
23681 } else {
23682 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23683 storeLE( mkexpr(addr), getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000023684 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
sewardjfce47a62012-06-03 23:12:33 +000023685 delta += alen;
23686 }
23687 goto decode_success;
23688 }
sewardjc4530ae2012-05-21 10:18:49 +000023689 break;
23690
23691 case 0x12:
sewardj53b12782012-05-22 23:34:06 +000023692 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
sewardjc4530ae2012-05-21 10:18:49 +000023693 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23694 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
23695 goto decode_success;
23696 }
sewardj82096922012-06-24 14:57:59 +000023697 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
23698 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23699 delta = dis_MOVDDUP_256( vbi, pfx, delta );
23700 goto decode_success;
23701 }
sewardj53b12782012-05-22 23:34:06 +000023702 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
23703 /* Insn only exists in reg form */
23704 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
23705 && epartIsReg(getUChar(delta))) {
23706 UChar modrm = getUChar(delta);
23707 UInt rG = gregOfRexRM(pfx, modrm);
23708 UInt rE = eregOfRexRM(pfx, modrm);
23709 UInt rV = getVexNvvvv(pfx);
23710 delta++;
23711 DIP("vmovhlps %s,%s,%s\n",
23712 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23713 IRTemp res = newTemp(Ity_V128);
23714 assign(res, binop(Iop_64HLtoV128,
23715 getXMMRegLane64(rV, 1),
23716 getXMMRegLane64(rE, 1)));
23717 putYMMRegLoAndZU(rG, mkexpr(res));
23718 *uses_vvvv = True;
23719 goto decode_success;
23720 }
sewardj82096922012-06-24 14:57:59 +000023721 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
23722 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000023723 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
23724 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000023725 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23726 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000023727 UChar modrm = getUChar(delta);
23728 UInt rG = gregOfRexRM(pfx, modrm);
23729 UInt rV = getVexNvvvv(pfx);
23730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23731 delta += alen;
23732 DIP("vmovlpd %s,%s,%s\n",
23733 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23734 IRTemp res = newTemp(Ity_V128);
23735 assign(res, binop(Iop_64HLtoV128,
23736 getXMMRegLane64(rV, 1),
23737 loadLE(Ity_I64, mkexpr(addr))));
23738 putYMMRegLoAndZU(rG, mkexpr(res));
23739 *uses_vvvv = True;
23740 goto decode_success;
23741 }
sewardj15ad1942012-06-20 10:21:05 +000023742 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
23743 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
23744 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
23745 True/*isL*/ );
23746 goto decode_success;
23747 }
23748 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
23749 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
23750 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
23751 goto decode_success;
23752 }
sewardj21459cb2012-06-18 14:05:52 +000023753 break;
23754
23755 case 0x13:
sewardj82096922012-06-24 14:57:59 +000023756 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
23757 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000023758 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
23759 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000023760 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23761 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000023762 UChar modrm = getUChar(delta);
23763 UInt rG = gregOfRexRM(pfx, modrm);
23764 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23765 delta += alen;
23766 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
23767 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
23768 goto decode_success;
23769 }
sewardjc4530ae2012-05-21 10:18:49 +000023770 break;
23771
23772 case 0x14:
sewardj56c30312012-06-12 08:45:39 +000023773 case 0x15:
sewardjc4530ae2012-05-21 10:18:49 +000023774 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
sewardj56c30312012-06-12 08:45:39 +000023775 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
sewardjc4530ae2012-05-21 10:18:49 +000023776 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23777 Bool hi = opc == 0x15;
23778 UChar modrm = getUChar(delta);
23779 UInt rG = gregOfRexRM(pfx,modrm);
23780 UInt rV = getVexNvvvv(pfx);
23781 IRTemp eV = newTemp(Ity_V128);
23782 IRTemp vV = newTemp(Ity_V128);
23783 assign( vV, getXMMReg(rV) );
23784 if (epartIsReg(modrm)) {
23785 UInt rE = eregOfRexRM(pfx,modrm);
23786 assign( eV, getXMMReg(rE) );
23787 delta += 1;
23788 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23789 nameXMMReg(rE), nameXMMReg(rG));
23790 } else {
23791 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23792 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
23793 delta += alen;
23794 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23795 dis_buf, nameXMMReg(rG));
23796 }
sewardj56c30312012-06-12 08:45:39 +000023797 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
23798 putYMMRegLoAndZU( rG, mkexpr(res) );
23799 *uses_vvvv = True;
23800 goto decode_success;
23801 }
sewardj4b1cc832012-06-13 11:10:20 +000023802 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
23803 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
23804 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23805 Bool hi = opc == 0x15;
23806 UChar modrm = getUChar(delta);
23807 UInt rG = gregOfRexRM(pfx,modrm);
23808 UInt rV = getVexNvvvv(pfx);
23809 IRTemp eV = newTemp(Ity_V256);
23810 IRTemp vV = newTemp(Ity_V256);
23811 assign( vV, getYMMReg(rV) );
23812 if (epartIsReg(modrm)) {
23813 UInt rE = eregOfRexRM(pfx,modrm);
23814 assign( eV, getYMMReg(rE) );
23815 delta += 1;
23816 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23817 nameYMMReg(rE), nameYMMReg(rG));
23818 } else {
23819 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23820 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
23821 delta += alen;
23822 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23823 dis_buf, nameYMMReg(rG));
23824 }
23825 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
23826 putYMMReg( rG, mkexpr(res) );
23827 *uses_vvvv = True;
23828 goto decode_success;
23829 }
sewardj56c30312012-06-12 08:45:39 +000023830 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
23831 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
23832 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23833 Bool hi = opc == 0x15;
23834 UChar modrm = getUChar(delta);
23835 UInt rG = gregOfRexRM(pfx,modrm);
23836 UInt rV = getVexNvvvv(pfx);
23837 IRTemp eV = newTemp(Ity_V128);
23838 IRTemp vV = newTemp(Ity_V128);
23839 assign( vV, getXMMReg(rV) );
23840 if (epartIsReg(modrm)) {
23841 UInt rE = eregOfRexRM(pfx,modrm);
23842 assign( eV, getXMMReg(rE) );
23843 delta += 1;
23844 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23845 nameXMMReg(rE), nameXMMReg(rG));
23846 } else {
23847 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23848 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
23849 delta += alen;
23850 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23851 dis_buf, nameXMMReg(rG));
23852 }
23853 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000023854 putYMMRegLoAndZU( rG, mkexpr(res) );
23855 *uses_vvvv = True;
23856 goto decode_success;
23857 }
sewardj4b1cc832012-06-13 11:10:20 +000023858 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
23859 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
23860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23861 Bool hi = opc == 0x15;
23862 UChar modrm = getUChar(delta);
23863 UInt rG = gregOfRexRM(pfx,modrm);
23864 UInt rV = getVexNvvvv(pfx);
23865 IRTemp eV = newTemp(Ity_V256);
23866 IRTemp vV = newTemp(Ity_V256);
23867 assign( vV, getYMMReg(rV) );
23868 if (epartIsReg(modrm)) {
23869 UInt rE = eregOfRexRM(pfx,modrm);
23870 assign( eV, getYMMReg(rE) );
23871 delta += 1;
23872 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23873 nameYMMReg(rE), nameYMMReg(rG));
23874 } else {
23875 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23876 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
23877 delta += alen;
23878 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23879 dis_buf, nameYMMReg(rG));
23880 }
23881 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
23882 putYMMReg( rG, mkexpr(res) );
23883 *uses_vvvv = True;
23884 goto decode_success;
23885 }
sewardjc4530ae2012-05-21 10:18:49 +000023886 break;
23887
sewardj91fa9802012-05-23 06:16:26 +000023888 case 0x16:
23889 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
23890 /* Insn only exists in reg form */
23891 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
23892 && epartIsReg(getUChar(delta))) {
23893 UChar modrm = getUChar(delta);
23894 UInt rG = gregOfRexRM(pfx, modrm);
23895 UInt rE = eregOfRexRM(pfx, modrm);
23896 UInt rV = getVexNvvvv(pfx);
23897 delta++;
23898 DIP("vmovlhps %s,%s,%s\n",
23899 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23900 IRTemp res = newTemp(Ity_V128);
23901 assign(res, binop(Iop_64HLtoV128,
23902 getXMMRegLane64(rE, 0),
23903 getXMMRegLane64(rV, 0)));
23904 putYMMRegLoAndZU(rG, mkexpr(res));
23905 *uses_vvvv = True;
23906 goto decode_success;
23907 }
sewardj8eb7ae82012-06-24 14:00:27 +000023908 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
23909 /* Insn exists only in mem form, it appears. */
sewardj6fcd43e2012-06-14 08:51:35 +000023910 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
23911 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000023912 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23913 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj6fcd43e2012-06-14 08:51:35 +000023914 UChar modrm = getUChar(delta);
23915 UInt rG = gregOfRexRM(pfx, modrm);
23916 UInt rV = getVexNvvvv(pfx);
23917 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23918 delta += alen;
sewardj8eb7ae82012-06-24 14:00:27 +000023919 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
sewardj6fcd43e2012-06-14 08:51:35 +000023920 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23921 IRTemp res = newTemp(Ity_V128);
23922 assign(res, binop(Iop_64HLtoV128,
23923 loadLE(Ity_I64, mkexpr(addr)),
23924 getXMMRegLane64(rV, 0)));
23925 putYMMRegLoAndZU(rG, mkexpr(res));
23926 *uses_vvvv = True;
23927 goto decode_success;
23928 }
sewardj15ad1942012-06-20 10:21:05 +000023929 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
23930 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
23931 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
23932 False/*!isL*/ );
23933 goto decode_success;
23934 }
23935 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
23936 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
23937 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
23938 goto decode_success;
23939 }
sewardj91fa9802012-05-23 06:16:26 +000023940 break;
23941
sewardj8ef22422012-05-24 16:29:18 +000023942 case 0x17:
sewardj8eb7ae82012-06-24 14:00:27 +000023943 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
23944 /* Insn exists only in mem form, it appears. */
sewardj8ef22422012-05-24 16:29:18 +000023945 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000023946 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000023947 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23948 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj8ef22422012-05-24 16:29:18 +000023949 UChar modrm = getUChar(delta);
23950 UInt rG = gregOfRexRM(pfx, modrm);
23951 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23952 delta += alen;
23953 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
sewardj8eb7ae82012-06-24 14:00:27 +000023954 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
23955 nameXMMReg(rG), dis_buf);
sewardj8ef22422012-05-24 16:29:18 +000023956 goto decode_success;
23957 }
23958 break;
23959
sewardjc4530ae2012-05-21 10:18:49 +000023960 case 0x28:
23961 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
23962 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23963 UChar modrm = getUChar(delta);
23964 UInt rG = gregOfRexRM(pfx, modrm);
23965 if (epartIsReg(modrm)) {
23966 UInt rE = eregOfRexRM(pfx,modrm);
23967 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23968 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23969 delta += 1;
23970 } else {
23971 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23972 gen_SEGV_if_not_16_aligned( addr );
23973 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23974 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
23975 delta += alen;
23976 }
23977 goto decode_success;
23978 }
23979 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
23980 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23981 UChar modrm = getUChar(delta);
23982 UInt rG = gregOfRexRM(pfx, modrm);
23983 if (epartIsReg(modrm)) {
23984 UInt rE = eregOfRexRM(pfx,modrm);
23985 putYMMReg( rG, getYMMReg( rE ));
23986 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23987 delta += 1;
23988 } else {
23989 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23990 gen_SEGV_if_not_32_aligned( addr );
23991 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23992 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
23993 delta += alen;
23994 }
23995 goto decode_success;
23996 }
23997 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
23998 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23999 UChar modrm = getUChar(delta);
24000 UInt rG = gregOfRexRM(pfx, modrm);
24001 if (epartIsReg(modrm)) {
24002 UInt rE = eregOfRexRM(pfx,modrm);
24003 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24004 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24005 delta += 1;
24006 } else {
24007 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24008 gen_SEGV_if_not_16_aligned( addr );
24009 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24010 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24011 delta += alen;
24012 }
24013 goto decode_success;
24014 }
sewardj6fcd43e2012-06-14 08:51:35 +000024015 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24016 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24017 UChar modrm = getUChar(delta);
24018 UInt rG = gregOfRexRM(pfx, modrm);
24019 if (epartIsReg(modrm)) {
24020 UInt rE = eregOfRexRM(pfx,modrm);
24021 putYMMReg( rG, getYMMReg( rE ));
24022 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24023 delta += 1;
24024 } else {
24025 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24026 gen_SEGV_if_not_32_aligned( addr );
24027 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24028 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24029 delta += alen;
24030 }
24031 goto decode_success;
24032 }
sewardjc4530ae2012-05-21 10:18:49 +000024033 break;
24034
24035 case 0x29:
sewardj98d02cc2012-06-02 11:55:25 +000024036 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24037 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24038 UChar modrm = getUChar(delta);
24039 UInt rG = gregOfRexRM(pfx,modrm);
24040 if (epartIsReg(modrm)) {
24041 UInt rE = eregOfRexRM(pfx,modrm);
24042 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24043 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24044 delta += 1;
24045 } else {
24046 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24047 gen_SEGV_if_not_16_aligned( addr );
24048 storeLE( mkexpr(addr), getXMMReg(rG) );
24049 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24050 delta += alen;
24051 }
24052 goto decode_success;
24053 }
24054 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24055 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24056 UChar modrm = getUChar(delta);
24057 UInt rG = gregOfRexRM(pfx,modrm);
24058 if (epartIsReg(modrm)) {
24059 UInt rE = eregOfRexRM(pfx,modrm);
24060 putYMMReg( rE, getYMMReg(rG) );
24061 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24062 delta += 1;
24063 } else {
24064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24065 gen_SEGV_if_not_32_aligned( addr );
24066 storeLE( mkexpr(addr), getYMMReg(rG) );
24067 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24068 delta += alen;
24069 }
24070 goto decode_success;
24071 }
sewardjc4530ae2012-05-21 10:18:49 +000024072 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24073 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24074 UChar modrm = getUChar(delta);
24075 UInt rG = gregOfRexRM(pfx,modrm);
24076 if (epartIsReg(modrm)) {
24077 UInt rE = eregOfRexRM(pfx,modrm);
24078 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24079 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24080 delta += 1;
24081 goto decode_success;
24082 } else {
24083 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24084 gen_SEGV_if_not_16_aligned( addr );
24085 storeLE( mkexpr(addr), getXMMReg(rG) );
24086 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24087 delta += alen;
24088 goto decode_success;
24089 }
24090 }
sewardj98d02cc2012-06-02 11:55:25 +000024091 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24092 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000024093 UChar modrm = getUChar(delta);
24094 UInt rG = gregOfRexRM(pfx,modrm);
24095 if (epartIsReg(modrm)) {
24096 UInt rE = eregOfRexRM(pfx,modrm);
sewardj98d02cc2012-06-02 11:55:25 +000024097 putYMMReg( rE, getYMMReg(rG) );
24098 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000024099 delta += 1;
sewardj98d02cc2012-06-02 11:55:25 +000024100 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000024101 } else {
24102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj98d02cc2012-06-02 11:55:25 +000024103 gen_SEGV_if_not_32_aligned( addr );
24104 storeLE( mkexpr(addr), getYMMReg(rG) );
24105 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
sewardjc4530ae2012-05-21 10:18:49 +000024106 delta += alen;
sewardj98d02cc2012-06-02 11:55:25 +000024107 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000024108 }
sewardjc4530ae2012-05-21 10:18:49 +000024109 }
24110 break;
24111
24112 case 0x2A: {
24113 IRTemp rmode = newTemp(Ity_I32);
24114 assign( rmode, get_sse_roundingmode() );
24115 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24116 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24117 UChar modrm = getUChar(delta);
24118 UInt rV = getVexNvvvv(pfx);
24119 UInt rD = gregOfRexRM(pfx, modrm);
24120 IRTemp arg32 = newTemp(Ity_I32);
24121 if (epartIsReg(modrm)) {
24122 UInt rS = eregOfRexRM(pfx,modrm);
24123 assign( arg32, getIReg32(rS) );
24124 delta += 1;
24125 DIP("vcvtsi2sdl %s,%s,%s\n",
24126 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24127 } else {
24128 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24129 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24130 delta += alen;
24131 DIP("vcvtsi2sdl %s,%s,%s\n",
24132 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24133 }
24134 putXMMRegLane64F( rD, 0,
24135 unop(Iop_I32StoF64, mkexpr(arg32)));
24136 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24137 putYMMRegLane128( rD, 1, mkV128(0) );
24138 *uses_vvvv = True;
24139 goto decode_success;
24140 }
24141 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24142 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24143 UChar modrm = getUChar(delta);
24144 UInt rV = getVexNvvvv(pfx);
24145 UInt rD = gregOfRexRM(pfx, modrm);
24146 IRTemp arg64 = newTemp(Ity_I64);
24147 if (epartIsReg(modrm)) {
24148 UInt rS = eregOfRexRM(pfx,modrm);
24149 assign( arg64, getIReg64(rS) );
24150 delta += 1;
24151 DIP("vcvtsi2sdq %s,%s,%s\n",
24152 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24153 } else {
24154 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24155 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24156 delta += alen;
24157 DIP("vcvtsi2sdq %s,%s,%s\n",
24158 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24159 }
24160 putXMMRegLane64F( rD, 0,
24161 binop( Iop_I64StoF64,
24162 get_sse_roundingmode(),
24163 mkexpr(arg64)) );
24164 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24165 putYMMRegLane128( rD, 1, mkV128(0) );
24166 *uses_vvvv = True;
24167 goto decode_success;
24168 }
24169 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24170 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24171 UChar modrm = getUChar(delta);
24172 UInt rV = getVexNvvvv(pfx);
24173 UInt rD = gregOfRexRM(pfx, modrm);
24174 IRTemp arg64 = newTemp(Ity_I64);
24175 if (epartIsReg(modrm)) {
24176 UInt rS = eregOfRexRM(pfx,modrm);
24177 assign( arg64, getIReg64(rS) );
24178 delta += 1;
24179 DIP("vcvtsi2ssq %s,%s,%s\n",
24180 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24181 } else {
24182 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24183 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24184 delta += alen;
24185 DIP("vcvtsi2ssq %s,%s,%s\n",
24186 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24187 }
24188 putXMMRegLane32F( rD, 0,
24189 binop(Iop_F64toF32,
24190 mkexpr(rmode),
24191 binop(Iop_I64StoF64, mkexpr(rmode),
24192 mkexpr(arg64)) ) );
24193 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24194 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24195 putYMMRegLane128( rD, 1, mkV128(0) );
24196 *uses_vvvv = True;
24197 goto decode_success;
24198 }
24199 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24200 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24201 UChar modrm = getUChar(delta);
24202 UInt rV = getVexNvvvv(pfx);
24203 UInt rD = gregOfRexRM(pfx, modrm);
24204 IRTemp arg32 = newTemp(Ity_I32);
24205 if (epartIsReg(modrm)) {
24206 UInt rS = eregOfRexRM(pfx,modrm);
24207 assign( arg32, getIReg32(rS) );
24208 delta += 1;
24209 DIP("vcvtsi2ssl %s,%s,%s\n",
24210 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24211 } else {
24212 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24213 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24214 delta += alen;
24215 DIP("vcvtsi2ssl %s,%s,%s\n",
24216 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24217 }
24218 putXMMRegLane32F( rD, 0,
24219 binop(Iop_F64toF32,
24220 mkexpr(rmode),
24221 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
24222 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24223 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24224 putYMMRegLane128( rD, 1, mkV128(0) );
24225 *uses_vvvv = True;
24226 goto decode_success;
24227 }
24228 break;
24229 }
24230
sewardj8eb7ae82012-06-24 14:00:27 +000024231 case 0x2B:
24232 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24233 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24234 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24235 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24236 UChar modrm = getUChar(delta);
24237 UInt rS = gregOfRexRM(pfx, modrm);
24238 IRTemp tS = newTemp(Ity_V128);
24239 assign(tS, getXMMReg(rS));
24240 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24241 delta += alen;
24242 gen_SEGV_if_not_16_aligned(addr);
24243 storeLE(mkexpr(addr), mkexpr(tS));
24244 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24245 nameXMMReg(rS), dis_buf);
24246 goto decode_success;
24247 }
24248 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24249 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24250 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24251 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
24252 UChar modrm = getUChar(delta);
24253 UInt rS = gregOfRexRM(pfx, modrm);
24254 IRTemp tS = newTemp(Ity_V256);
24255 assign(tS, getYMMReg(rS));
24256 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24257 delta += alen;
24258 gen_SEGV_if_not_32_aligned(addr);
24259 storeLE(mkexpr(addr), mkexpr(tS));
24260 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24261 nameYMMReg(rS), dis_buf);
24262 goto decode_success;
24263 }
24264 break;
24265
sewardjc4530ae2012-05-21 10:18:49 +000024266 case 0x2C:
sewardj80804d12012-05-22 10:48:13 +000024267 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
sewardjc4530ae2012-05-21 10:18:49 +000024268 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24269 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24270 goto decode_success;
24271 }
24272 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24273 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24274 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24275 goto decode_success;
24276 }
sewardj80804d12012-05-22 10:48:13 +000024277 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24278 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24279 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24280 goto decode_success;
24281 }
sewardj8ef22422012-05-24 16:29:18 +000024282 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24283 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24284 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24285 goto decode_success;
24286 }
sewardjc4530ae2012-05-21 10:18:49 +000024287 break;
24288
sewardjadf357c2012-06-24 13:44:17 +000024289 case 0x2D:
24290 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24291 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24292 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24293 goto decode_success;
24294 }
24295 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24296 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24297 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24298 goto decode_success;
24299 }
24300 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24301 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24302 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24303 goto decode_success;
24304 }
24305 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24306 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24307 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24308 goto decode_success;
24309 }
24310 break;
24311
sewardjc4530ae2012-05-21 10:18:49 +000024312 case 0x2E:
sewardj4ed05e02012-06-18 15:01:30 +000024313 case 0x2F:
sewardjc4530ae2012-05-21 10:18:49 +000024314 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000024315 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000024316 if (have66noF2noF3(pfx)) {
24317 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
24318 goto decode_success;
24319 }
24320 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000024321 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000024322 if (haveNo66noF2noF3(pfx)) {
24323 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
24324 goto decode_success;
24325 }
24326 break;
24327
sewardj8eb7ae82012-06-24 14:00:27 +000024328 case 0x50:
24329 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
24330 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24331 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
24332 goto decode_success;
24333 }
24334 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
24335 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24336 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
24337 goto decode_success;
24338 }
24339 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
24340 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24341 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
24342 goto decode_success;
24343 }
24344 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
24345 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24346 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
24347 goto decode_success;
24348 }
24349 break;
24350
sewardjc4530ae2012-05-21 10:18:49 +000024351 case 0x51:
sewardj66becf32012-06-18 23:15:16 +000024352 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
24353 if (haveF3no66noF2(pfx)) {
24354 delta = dis_AVX128_E_V_to_G_lo32_unary(
24355 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
24356 goto decode_success;
24357 }
24358 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
24359 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24360 delta = dis_AVX128_E_to_G_unary_all(
24361 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
24362 goto decode_success;
24363 }
24364 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
24365 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24366 delta = dis_AVX256_E_to_G_unary_all(
24367 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
24368 goto decode_success;
24369 }
sewardjc4530ae2012-05-21 10:18:49 +000024370 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
24371 if (haveF2no66noF3(pfx)) {
24372 delta = dis_AVX128_E_V_to_G_lo64_unary(
24373 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
24374 goto decode_success;
sewardj66becf32012-06-18 23:15:16 +000024375 }
24376 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
24377 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24378 delta = dis_AVX128_E_to_G_unary_all(
24379 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
24380 goto decode_success;
24381 }
24382 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
24383 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24384 delta = dis_AVX256_E_to_G_unary_all(
24385 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
24386 goto decode_success;
24387 }
24388 break;
24389
24390 case 0x52:
24391 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
24392 if (haveF3no66noF2(pfx)) {
24393 delta = dis_AVX128_E_V_to_G_lo32_unary(
sewardj1ddee212014-08-24 14:00:19 +000024394 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
24395 Iop_RSqrtEst32F0x4 );
sewardj66becf32012-06-18 23:15:16 +000024396 goto decode_success;
24397 }
24398 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
24399 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24400 delta = dis_AVX128_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024401 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
sewardj66becf32012-06-18 23:15:16 +000024402 goto decode_success;
24403 }
24404 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
24405 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24406 delta = dis_AVX256_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024407 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
sewardj66becf32012-06-18 23:15:16 +000024408 goto decode_success;
24409 }
24410 break;
sewardjc4530ae2012-05-21 10:18:49 +000024411
sewardj82096922012-06-24 14:57:59 +000024412 case 0x53:
24413 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
24414 if (haveF3no66noF2(pfx)) {
24415 delta = dis_AVX128_E_V_to_G_lo32_unary(
sewardj1ddee212014-08-24 14:00:19 +000024416 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
sewardj82096922012-06-24 14:57:59 +000024417 goto decode_success;
24418 }
24419 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
24420 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24421 delta = dis_AVX128_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024422 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
sewardj82096922012-06-24 14:57:59 +000024423 goto decode_success;
24424 }
24425 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
24426 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24427 delta = dis_AVX256_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024428 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
sewardj82096922012-06-24 14:57:59 +000024429 goto decode_success;
24430 }
24431 break;
24432
sewardjc4530ae2012-05-21 10:18:49 +000024433 case 0x54:
sewardj251b59e2012-05-25 13:51:07 +000024434 /* VANDPD r/m, rV, r ::: r = rV & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024435 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
24436 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24437 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24438 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
24439 goto decode_success;
24440 }
sewardj4b1cc832012-06-13 11:10:20 +000024441 /* VANDPD r/m, rV, r ::: r = rV & r/m */
24442 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
24443 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24444 delta = dis_AVX256_E_V_to_G(
24445 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
24446 goto decode_success;
24447 }
sewardjc4530ae2012-05-21 10:18:49 +000024448 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
24449 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24450 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24451 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
24452 goto decode_success;
24453 }
sewardj2a2bda92012-06-14 23:32:02 +000024454 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
24455 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24456 delta = dis_AVX256_E_V_to_G(
24457 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
24458 goto decode_success;
24459 }
sewardjc4530ae2012-05-21 10:18:49 +000024460 break;
24461
24462 case 0x55:
sewardj251b59e2012-05-25 13:51:07 +000024463 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024464 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
24465 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24466 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24467 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000024468 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000024469 goto decode_success;
24470 }
sewardj2a2bda92012-06-14 23:32:02 +000024471 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
24472 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24473 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
24474 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
24475 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
24476 goto decode_success;
24477 }
sewardjc4530ae2012-05-21 10:18:49 +000024478 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
24479 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24480 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24481 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000024482 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000024483 goto decode_success;
24484 }
sewardj2a2bda92012-06-14 23:32:02 +000024485 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
24486 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24487 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
24488 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
24489 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
24490 goto decode_success;
24491 }
sewardjc4530ae2012-05-21 10:18:49 +000024492 break;
24493
24494 case 0x56:
sewardj251b59e2012-05-25 13:51:07 +000024495 /* VORPD r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024496 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
24497 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24498 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24499 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
24500 goto decode_success;
24501 }
sewardj2a2bda92012-06-14 23:32:02 +000024502 /* VORPD r/m, rV, r ::: r = rV | r/m */
24503 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
24504 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24505 delta = dis_AVX256_E_V_to_G(
24506 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
24507 goto decode_success;
24508 }
sewardj251b59e2012-05-25 13:51:07 +000024509 /* VORPS r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024510 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
24511 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24512 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24513 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
24514 goto decode_success;
24515 }
sewardj2a2bda92012-06-14 23:32:02 +000024516 /* VORPS r/m, rV, r ::: r = rV | r/m */
24517 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
24518 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24519 delta = dis_AVX256_E_V_to_G(
24520 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
24521 goto decode_success;
24522 }
sewardjc4530ae2012-05-21 10:18:49 +000024523 break;
24524
24525 case 0x57:
sewardj251b59e2012-05-25 13:51:07 +000024526 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024527 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
24528 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24529 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24530 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
24531 goto decode_success;
24532 }
sewardj4b1cc832012-06-13 11:10:20 +000024533 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
24534 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
24535 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24536 delta = dis_AVX256_E_V_to_G(
24537 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
24538 goto decode_success;
24539 }
sewardj251b59e2012-05-25 13:51:07 +000024540 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024541 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
24542 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24543 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24544 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
24545 goto decode_success;
24546 }
sewardj2a2bda92012-06-14 23:32:02 +000024547 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
24548 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
24549 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24550 delta = dis_AVX256_E_V_to_G(
24551 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
24552 goto decode_success;
24553 }
sewardjc4530ae2012-05-21 10:18:49 +000024554 break;
24555
24556 case 0x58:
24557 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
24558 if (haveF2no66noF3(pfx)) {
24559 delta = dis_AVX128_E_V_to_G_lo64(
24560 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
24561 goto decode_success;
24562 }
24563 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
24564 if (haveF3no66noF2(pfx)) {
24565 delta = dis_AVX128_E_V_to_G_lo32(
24566 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
24567 goto decode_success;
24568 }
sewardj251b59e2012-05-25 13:51:07 +000024569 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
24570 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24571 delta = dis_AVX128_E_V_to_G(
24572 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
24573 goto decode_success;
24574 }
sewardj56c30312012-06-12 08:45:39 +000024575 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
24576 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24577 delta = dis_AVX256_E_V_to_G(
24578 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
24579 goto decode_success;
24580 }
24581 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
24582 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24583 delta = dis_AVX128_E_V_to_G(
24584 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
24585 goto decode_success;
24586 }
24587 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
24588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24589 delta = dis_AVX256_E_V_to_G(
24590 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
24591 goto decode_success;
24592 }
sewardjc4530ae2012-05-21 10:18:49 +000024593 break;
24594
24595 case 0x59:
24596 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
24597 if (haveF2no66noF3(pfx)) {
24598 delta = dis_AVX128_E_V_to_G_lo64(
24599 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
24600 goto decode_success;
24601 }
24602 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
24603 if (haveF3no66noF2(pfx)) {
24604 delta = dis_AVX128_E_V_to_G_lo32(
24605 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
24606 goto decode_success;
24607 }
sewardj251b59e2012-05-25 13:51:07 +000024608 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
24609 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24610 delta = dis_AVX128_E_V_to_G(
24611 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
24612 goto decode_success;
24613 }
sewardj56c30312012-06-12 08:45:39 +000024614 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
24615 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24616 delta = dis_AVX256_E_V_to_G(
24617 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
24618 goto decode_success;
24619 }
24620 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
24621 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24622 delta = dis_AVX128_E_V_to_G(
24623 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
24624 goto decode_success;
24625 }
24626 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
24627 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24628 delta = dis_AVX256_E_V_to_G(
24629 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
24630 goto decode_success;
24631 }
sewardjc4530ae2012-05-21 10:18:49 +000024632 break;
24633
24634 case 0x5A:
24635 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000024636 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000024637 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
24638 goto decode_success;
24639 }
24640 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
24641 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24642 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000024643 goto decode_success;
24644 }
24645 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000024646 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000024647 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
24648 goto decode_success;
24649 }
24650 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
24651 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24652 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000024653 goto decode_success;
24654 }
sewardj72df0682012-05-23 23:54:30 +000024655 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
24656 if (haveF2no66noF3(pfx)) {
24657 UChar modrm = getUChar(delta);
24658 UInt rV = getVexNvvvv(pfx);
24659 UInt rD = gregOfRexRM(pfx, modrm);
24660 IRTemp f64lo = newTemp(Ity_F64);
24661 IRTemp rmode = newTemp(Ity_I32);
24662 assign( rmode, get_sse_roundingmode() );
24663 if (epartIsReg(modrm)) {
24664 UInt rS = eregOfRexRM(pfx,modrm);
24665 assign(f64lo, getXMMRegLane64F(rS, 0));
24666 delta += 1;
24667 DIP("vcvtsd2ss %s,%s,%s\n",
24668 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
24669 } else {
24670 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24671 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
24672 delta += alen;
24673 DIP("vcvtsd2ss %s,%s,%s\n",
24674 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24675 }
24676 putXMMRegLane32F( rD, 0,
24677 binop( Iop_F64toF32, mkexpr(rmode),
24678 mkexpr(f64lo)) );
24679 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24680 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24681 putYMMRegLane128( rD, 1, mkV128(0) );
24682 *uses_vvvv = True;
24683 goto decode_success;
24684 }
24685 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
24686 if (haveF3no66noF2(pfx)) {
24687 UChar modrm = getUChar(delta);
24688 UInt rV = getVexNvvvv(pfx);
24689 UInt rD = gregOfRexRM(pfx, modrm);
24690 IRTemp f32lo = newTemp(Ity_F32);
24691 if (epartIsReg(modrm)) {
24692 UInt rS = eregOfRexRM(pfx,modrm);
24693 assign(f32lo, getXMMRegLane32F(rS, 0));
24694 delta += 1;
24695 DIP("vcvtss2sd %s,%s,%s\n",
24696 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
24697 } else {
24698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24699 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
24700 delta += alen;
24701 DIP("vcvtss2sd %s,%s,%s\n",
24702 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24703 }
24704 putXMMRegLane64F( rD, 0,
24705 unop( Iop_F32toF64, mkexpr(f32lo)) );
24706 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24707 putYMMRegLane128( rD, 1, mkV128(0) );
24708 *uses_vvvv = True;
24709 goto decode_success;
24710 }
sewardjc4530ae2012-05-21 10:18:49 +000024711 break;
24712
sewardj251b59e2012-05-25 13:51:07 +000024713 case 0x5B:
24714 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
24715 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000024716 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
24717 True/*isAvx*/, False/*!r2zero*/ );
24718 goto decode_success;
24719 }
24720 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
24721 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24722 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
24723 False/*!r2zero*/ );
24724 goto decode_success;
24725 }
24726 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
24727 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24728 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
24729 True/*isAvx*/, True/*r2zero*/ );
24730 goto decode_success;
24731 }
24732 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
24733 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24734 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
24735 True/*r2zero*/ );
24736 goto decode_success;
24737 }
24738 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
24739 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24740 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
24741 goto decode_success;
24742 }
24743 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
24744 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24745 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
sewardj251b59e2012-05-25 13:51:07 +000024746 goto decode_success;
24747 }
24748 break;
24749
sewardjc4530ae2012-05-21 10:18:49 +000024750 case 0x5C:
24751 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
24752 if (haveF2no66noF3(pfx)) {
24753 delta = dis_AVX128_E_V_to_G_lo64(
24754 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
24755 goto decode_success;
24756 }
24757 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
24758 if (haveF3no66noF2(pfx)) {
24759 delta = dis_AVX128_E_V_to_G_lo32(
24760 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
24761 goto decode_success;
24762 }
sewardj251b59e2012-05-25 13:51:07 +000024763 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
24764 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24765 delta = dis_AVX128_E_V_to_G(
24766 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
24767 goto decode_success;
24768 }
sewardj56c30312012-06-12 08:45:39 +000024769 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
24770 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24771 delta = dis_AVX256_E_V_to_G(
24772 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
24773 goto decode_success;
24774 }
24775 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
24776 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24777 delta = dis_AVX128_E_V_to_G(
24778 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
24779 goto decode_success;
24780 }
24781 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
24782 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24783 delta = dis_AVX256_E_V_to_G(
24784 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
24785 goto decode_success;
24786 }
sewardjc4530ae2012-05-21 10:18:49 +000024787 break;
24788
24789 case 0x5D:
24790 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
24791 if (haveF2no66noF3(pfx)) {
24792 delta = dis_AVX128_E_V_to_G_lo64(
24793 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
24794 goto decode_success;
24795 }
24796 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
24797 if (haveF3no66noF2(pfx)) {
24798 delta = dis_AVX128_E_V_to_G_lo32(
24799 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
24800 goto decode_success;
24801 }
sewardj251b59e2012-05-25 13:51:07 +000024802 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
24803 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24804 delta = dis_AVX128_E_V_to_G(
24805 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
24806 goto decode_success;
24807 }
sewardj8eb7ae82012-06-24 14:00:27 +000024808 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
24809 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24810 delta = dis_AVX256_E_V_to_G(
24811 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
24812 goto decode_success;
24813 }
24814 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
24815 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24816 delta = dis_AVX128_E_V_to_G(
24817 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
24818 goto decode_success;
24819 }
24820 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
24821 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24822 delta = dis_AVX256_E_V_to_G(
24823 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
24824 goto decode_success;
24825 }
sewardjc4530ae2012-05-21 10:18:49 +000024826 break;
24827
24828 case 0x5E:
24829 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
24830 if (haveF2no66noF3(pfx)) {
24831 delta = dis_AVX128_E_V_to_G_lo64(
24832 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
24833 goto decode_success;
24834 }
24835 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
24836 if (haveF3no66noF2(pfx)) {
24837 delta = dis_AVX128_E_V_to_G_lo32(
24838 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
24839 goto decode_success;
24840 }
sewardj2a2bda92012-06-14 23:32:02 +000024841 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
24842 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24843 delta = dis_AVX128_E_V_to_G(
24844 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
24845 goto decode_success;
24846 }
sewardj56c30312012-06-12 08:45:39 +000024847 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
24848 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24849 delta = dis_AVX256_E_V_to_G(
24850 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
24851 goto decode_success;
24852 }
sewardj4b1cc832012-06-13 11:10:20 +000024853 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
24854 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24855 delta = dis_AVX128_E_V_to_G(
24856 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
24857 goto decode_success;
24858 }
sewardj56c30312012-06-12 08:45:39 +000024859 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
24860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24861 delta = dis_AVX256_E_V_to_G(
24862 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
24863 goto decode_success;
24864 }
sewardjc4530ae2012-05-21 10:18:49 +000024865 break;
24866
24867 case 0x5F:
24868 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
24869 if (haveF2no66noF3(pfx)) {
24870 delta = dis_AVX128_E_V_to_G_lo64(
24871 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
24872 goto decode_success;
24873 }
24874 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
24875 if (haveF3no66noF2(pfx)) {
24876 delta = dis_AVX128_E_V_to_G_lo32(
24877 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
24878 goto decode_success;
24879 }
sewardj251b59e2012-05-25 13:51:07 +000024880 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
24881 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24882 delta = dis_AVX128_E_V_to_G(
24883 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
24884 goto decode_success;
24885 }
sewardj8eb7ae82012-06-24 14:00:27 +000024886 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
24887 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24888 delta = dis_AVX256_E_V_to_G(
24889 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
24890 goto decode_success;
24891 }
24892 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
24893 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24894 delta = dis_AVX128_E_V_to_G(
24895 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
24896 goto decode_success;
24897 }
24898 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
24899 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24900 delta = dis_AVX256_E_V_to_G(
24901 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
24902 goto decode_success;
24903 }
sewardjc4530ae2012-05-21 10:18:49 +000024904 break;
24905
sewardj44565e82012-05-22 09:14:15 +000024906 case 0x60:
sewardj251b59e2012-05-25 13:51:07 +000024907 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000024908 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
sewardj44565e82012-05-22 09:14:15 +000024909 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24910 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24911 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
24912 Iop_InterleaveLO8x16, NULL,
24913 False/*!invertLeftArg*/, True/*swapArgs*/ );
24914 goto decode_success;
24915 }
sewardjcc3d2192013-03-27 11:37:33 +000024916 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
24917 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
24918 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24919 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24920 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
24921 math_VPUNPCKLBW_YMM );
24922 goto decode_success;
24923 }
sewardj44565e82012-05-22 09:14:15 +000024924 break;
24925
sewardj251b59e2012-05-25 13:51:07 +000024926 case 0x61:
24927 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000024928 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
sewardj251b59e2012-05-25 13:51:07 +000024929 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24930 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24931 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
24932 Iop_InterleaveLO16x8, NULL,
24933 False/*!invertLeftArg*/, True/*swapArgs*/ );
24934 goto decode_success;
24935 }
sewardjcc3d2192013-03-27 11:37:33 +000024936 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
24937 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
24938 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24939 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24940 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
24941 math_VPUNPCKLWD_YMM );
24942 goto decode_success;
24943 }
sewardj251b59e2012-05-25 13:51:07 +000024944 break;
24945
sewardj6faf7cc2012-05-25 15:53:01 +000024946 case 0x62:
24947 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
24948 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
24949 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24950 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24951 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
24952 Iop_InterleaveLO32x4, NULL,
24953 False/*!invertLeftArg*/, True/*swapArgs*/ );
24954 goto decode_success;
24955 }
sewardjcc3d2192013-03-27 11:37:33 +000024956 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
24957 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
24958 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24959 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24960 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
24961 math_VPUNPCKLDQ_YMM );
24962 goto decode_success;
24963 }
sewardj6faf7cc2012-05-25 15:53:01 +000024964 break;
24965
sewardj8516a1f2012-06-24 14:26:30 +000024966 case 0x63:
24967 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
24968 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
24969 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24970 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24971 uses_vvvv, vbi, pfx, delta, "vpacksswb",
24972 Iop_QNarrowBin16Sto8Sx16, NULL,
24973 False/*!invertLeftArg*/, True/*swapArgs*/ );
24974 goto decode_success;
24975 }
sewardjcc3d2192013-03-27 11:37:33 +000024976 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
24977 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
24978 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24979 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24980 uses_vvvv, vbi, pfx, delta, "vpacksswb",
24981 math_VPACKSSWB_YMM );
24982 goto decode_success;
24983 }
sewardj8516a1f2012-06-24 14:26:30 +000024984 break;
24985
sewardj89378162012-06-24 12:12:20 +000024986 case 0x64:
24987 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
24988 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
24989 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24990 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24991 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
24992 goto decode_success;
24993 }
sewardjcc3d2192013-03-27 11:37:33 +000024994 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
24995 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
24996 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24997 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
24998 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
24999 goto decode_success;
25000 }
sewardj89378162012-06-24 12:12:20 +000025001 break;
25002
25003 case 0x65:
25004 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25005 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25006 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25007 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25008 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25009 goto decode_success;
25010 }
sewardjcc3d2192013-03-27 11:37:33 +000025011 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25012 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25013 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25014 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25015 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25016 goto decode_success;
25017 }
sewardj89378162012-06-24 12:12:20 +000025018 break;
25019
sewardj4ed05e02012-06-18 15:01:30 +000025020 case 0x66:
25021 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25022 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25023 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25024 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25025 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25026 goto decode_success;
25027 }
sewardjcc3d2192013-03-27 11:37:33 +000025028 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25029 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25030 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25031 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25032 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25033 goto decode_success;
25034 }
sewardj4ed05e02012-06-18 15:01:30 +000025035 break;
25036
sewardj6c4e45c2012-05-24 00:09:27 +000025037 case 0x67:
25038 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000025039 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
sewardj6c4e45c2012-05-24 00:09:27 +000025040 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25041 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25042 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25043 Iop_QNarrowBin16Sto8Ux16, NULL,
25044 False/*!invertLeftArg*/, True/*swapArgs*/ );
25045 goto decode_success;
25046 }
sewardjcc3d2192013-03-27 11:37:33 +000025047 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25048 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25049 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25050 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25051 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25052 math_VPACKUSWB_YMM );
25053 goto decode_success;
25054 }
sewardj6c4e45c2012-05-24 00:09:27 +000025055 break;
25056
sewardj44565e82012-05-22 09:14:15 +000025057 case 0x68:
sewardj251b59e2012-05-25 13:51:07 +000025058 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
sewardj44565e82012-05-22 09:14:15 +000025059 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25061 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25062 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25063 Iop_InterleaveHI8x16, NULL,
25064 False/*!invertLeftArg*/, True/*swapArgs*/ );
25065 goto decode_success;
25066 }
sewardjcc3d2192013-03-27 11:37:33 +000025067 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25068 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25069 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25070 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25071 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25072 math_VPUNPCKHBW_YMM );
25073 goto decode_success;
25074 }
sewardj44565e82012-05-22 09:14:15 +000025075 break;
25076
sewardj251b59e2012-05-25 13:51:07 +000025077 case 0x69:
25078 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25079 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25080 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25081 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25082 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25083 Iop_InterleaveHI16x8, NULL,
25084 False/*!invertLeftArg*/, True/*swapArgs*/ );
25085 goto decode_success;
25086 }
sewardjcc3d2192013-03-27 11:37:33 +000025087 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25088 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25089 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25090 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25091 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25092 math_VPUNPCKHWD_YMM );
25093 goto decode_success;
25094 }
sewardj251b59e2012-05-25 13:51:07 +000025095 break;
25096
sewardj6fcd43e2012-06-14 08:51:35 +000025097 case 0x6A:
25098 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25099 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25100 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25101 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25102 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25103 Iop_InterleaveHI32x4, NULL,
25104 False/*!invertLeftArg*/, True/*swapArgs*/ );
25105 goto decode_success;
25106 }
sewardjcc3d2192013-03-27 11:37:33 +000025107 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25108 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25109 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25110 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25111 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25112 math_VPUNPCKHDQ_YMM );
25113 goto decode_success;
25114 }
sewardj6fcd43e2012-06-14 08:51:35 +000025115 break;
25116
sewardj6faf7cc2012-05-25 15:53:01 +000025117 case 0x6B:
25118 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25119 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25120 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25121 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25122 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25123 Iop_QNarrowBin32Sto16Sx8, NULL,
25124 False/*!invertLeftArg*/, True/*swapArgs*/ );
25125 goto decode_success;
25126 }
sewardjcc3d2192013-03-27 11:37:33 +000025127 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25128 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25129 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25130 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25131 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25132 math_VPACKSSDW_YMM );
25133 goto decode_success;
25134 }
sewardj6faf7cc2012-05-25 15:53:01 +000025135 break;
25136
25137 case 0x6C:
25138 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25139 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25140 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25141 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25142 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25143 Iop_InterleaveLO64x2, NULL,
25144 False/*!invertLeftArg*/, True/*swapArgs*/ );
25145 goto decode_success;
25146 }
sewardjcc3d2192013-03-27 11:37:33 +000025147 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25148 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25149 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25150 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25151 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25152 math_VPUNPCKLQDQ_YMM );
25153 goto decode_success;
25154 }
sewardj6faf7cc2012-05-25 15:53:01 +000025155 break;
25156
sewardjfe0c5e72012-06-15 15:48:07 +000025157 case 0x6D:
25158 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25159 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25160 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25161 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25162 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25163 Iop_InterleaveHI64x2, NULL,
25164 False/*!invertLeftArg*/, True/*swapArgs*/ );
25165 goto decode_success;
25166 }
sewardjcc3d2192013-03-27 11:37:33 +000025167 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25168 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25169 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25170 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25171 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25172 math_VPUNPCKHQDQ_YMM );
25173 goto decode_success;
25174 }
sewardjfe0c5e72012-06-15 15:48:07 +000025175 break;
25176
sewardjc4530ae2012-05-21 10:18:49 +000025177 case 0x6E:
25178 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25179 if (have66noF2noF3(pfx)
25180 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25181 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25182 UChar modrm = getUChar(delta);
25183 if (epartIsReg(modrm)) {
25184 delta += 1;
25185 putYMMRegLoAndZU(
25186 gregOfRexRM(pfx,modrm),
25187 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25188 );
25189 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25190 nameXMMReg(gregOfRexRM(pfx,modrm)));
25191 } else {
25192 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25193 delta += alen;
25194 putYMMRegLoAndZU(
25195 gregOfRexRM(pfx,modrm),
25196 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25197 );
25198 DIP("vmovd %s, %s\n", dis_buf,
25199 nameXMMReg(gregOfRexRM(pfx,modrm)));
25200 }
25201 goto decode_success;
25202 }
sewardj6eaf00c2012-05-23 11:33:56 +000025203 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25204 if (have66noF2noF3(pfx)
25205 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25206 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
25207 UChar modrm = getUChar(delta);
25208 if (epartIsReg(modrm)) {
25209 delta += 1;
25210 putYMMRegLoAndZU(
25211 gregOfRexRM(pfx,modrm),
25212 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
25213 );
25214 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
25215 nameXMMReg(gregOfRexRM(pfx,modrm)));
25216 } else {
25217 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25218 delta += alen;
25219 putYMMRegLoAndZU(
25220 gregOfRexRM(pfx,modrm),
25221 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
25222 );
25223 DIP("vmovq %s, %s\n", dis_buf,
25224 nameXMMReg(gregOfRexRM(pfx,modrm)));
25225 }
25226 goto decode_success;
25227 }
sewardjc4530ae2012-05-21 10:18:49 +000025228 break;
25229
25230 case 0x6F:
25231 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25232 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
sewardj66becf32012-06-18 23:15:16 +000025233 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
sewardjc4530ae2012-05-21 10:18:49 +000025234 && 1==getVexL(pfx)/*256*/) {
25235 UChar modrm = getUChar(delta);
25236 UInt rD = gregOfRexRM(pfx, modrm);
25237 IRTemp tD = newTemp(Ity_V256);
25238 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025239 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025240 if (epartIsReg(modrm)) {
25241 UInt rS = eregOfRexRM(pfx, modrm);
25242 delta += 1;
25243 assign(tD, getYMMReg(rS));
25244 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
25245 } else {
25246 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25247 delta += alen;
25248 if (isA)
25249 gen_SEGV_if_not_32_aligned(addr);
25250 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
25251 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
25252 }
25253 putYMMReg(rD, mkexpr(tD));
25254 goto decode_success;
25255 }
25256 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25257 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25258 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25259 && 0==getVexL(pfx)/*128*/) {
25260 UChar modrm = getUChar(delta);
25261 UInt rD = gregOfRexRM(pfx, modrm);
25262 IRTemp tD = newTemp(Ity_V128);
25263 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025264 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025265 if (epartIsReg(modrm)) {
25266 UInt rS = eregOfRexRM(pfx, modrm);
25267 delta += 1;
25268 assign(tD, getXMMReg(rS));
25269 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25270 } else {
25271 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25272 delta += alen;
25273 if (isA)
25274 gen_SEGV_if_not_16_aligned(addr);
25275 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
25276 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
25277 }
25278 putYMMRegLoAndZU(rD, mkexpr(tD));
25279 goto decode_success;
25280 }
25281 break;
25282
25283 case 0x70:
25284 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25286 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
25287 goto decode_success;
25288 }
sewardjcc3d2192013-03-27 11:37:33 +000025289 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25290 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25291 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
25292 goto decode_success;
25293 }
sewardj251b59e2012-05-25 13:51:07 +000025294 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25295 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25296 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25297 True/*isAvx*/, False/*!xIsH*/ );
25298 goto decode_success;
25299 }
sewardjcc3d2192013-03-27 11:37:33 +000025300 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25301 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25302 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
25303 goto decode_success;
25304 }
sewardj251b59e2012-05-25 13:51:07 +000025305 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25306 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25307 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25308 True/*isAvx*/, True/*xIsH*/ );
25309 goto decode_success;
25310 }
sewardjcc3d2192013-03-27 11:37:33 +000025311 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25312 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25313 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
25314 goto decode_success;
25315 }
sewardjc4530ae2012-05-21 10:18:49 +000025316 break;
25317
sewardj6faf7cc2012-05-25 15:53:01 +000025318 case 0x71:
25319 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
sewardjfe0c5e72012-06-15 15:48:07 +000025320 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
sewardj15ad1942012-06-20 10:21:05 +000025321 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
sewardj6faf7cc2012-05-25 15:53:01 +000025322 if (have66noF2noF3(pfx)
25323 && 0==getVexL(pfx)/*128*/
25324 && epartIsReg(getUChar(delta))) {
25325 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25326 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25327 "vpsrlw", Iop_ShrN16x8 );
25328 *uses_vvvv = True;
25329 goto decode_success;
25330 }
sewardjfe0c5e72012-06-15 15:48:07 +000025331 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25332 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25333 "vpsraw", Iop_SarN16x8 );
25334 *uses_vvvv = True;
25335 goto decode_success;
25336 }
sewardj15ad1942012-06-20 10:21:05 +000025337 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25338 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25339 "vpsllw", Iop_ShlN16x8 );
25340 *uses_vvvv = True;
25341 goto decode_success;
25342 }
sewardj6faf7cc2012-05-25 15:53:01 +000025343 /* else fall through */
25344 }
sewardjcc3d2192013-03-27 11:37:33 +000025345 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
25346 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
25347 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
25348 if (have66noF2noF3(pfx)
25349 && 1==getVexL(pfx)/*256*/
25350 && epartIsReg(getUChar(delta))) {
25351 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25352 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25353 "vpsrlw", Iop_ShrN16x16 );
25354 *uses_vvvv = True;
25355 goto decode_success;
25356 }
25357 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25358 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25359 "vpsraw", Iop_SarN16x16 );
25360 *uses_vvvv = True;
25361 goto decode_success;
25362 }
25363 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25364 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25365 "vpsllw", Iop_ShlN16x16 );
25366 *uses_vvvv = True;
25367 goto decode_success;
25368 }
25369 /* else fall through */
25370 }
sewardj6faf7cc2012-05-25 15:53:01 +000025371 break;
25372
sewardjc4530ae2012-05-21 10:18:49 +000025373 case 0x72:
sewardj251b59e2012-05-25 13:51:07 +000025374 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
sewardj15ad1942012-06-20 10:21:05 +000025375 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
25376 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000025377 if (have66noF2noF3(pfx)
25378 && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000025379 && epartIsReg(getUChar(delta))) {
sewardj251b59e2012-05-25 13:51:07 +000025380 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25381 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25382 "vpsrld", Iop_ShrN32x4 );
25383 *uses_vvvv = True;
25384 goto decode_success;
25385 }
sewardj15ad1942012-06-20 10:21:05 +000025386 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25387 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25388 "vpsrad", Iop_SarN32x4 );
25389 *uses_vvvv = True;
25390 goto decode_success;
25391 }
25392 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25393 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25394 "vpslld", Iop_ShlN32x4 );
25395 *uses_vvvv = True;
25396 goto decode_success;
25397 }
sewardj251b59e2012-05-25 13:51:07 +000025398 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000025399 }
sewardjcc3d2192013-03-27 11:37:33 +000025400 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
25401 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
25402 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
25403 if (have66noF2noF3(pfx)
25404 && 1==getVexL(pfx)/*256*/
25405 && epartIsReg(getUChar(delta))) {
25406 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25407 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25408 "vpsrld", Iop_ShrN32x8 );
25409 *uses_vvvv = True;
25410 goto decode_success;
25411 }
25412 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25413 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25414 "vpsrad", Iop_SarN32x8 );
25415 *uses_vvvv = True;
25416 goto decode_success;
25417 }
25418 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25419 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25420 "vpslld", Iop_ShlN32x8 );
25421 *uses_vvvv = True;
25422 goto decode_success;
25423 }
25424 /* else fall through */
25425 }
sewardjc4530ae2012-05-21 10:18:49 +000025426 break;
25427
25428 case 0x73:
sewardj251b59e2012-05-25 13:51:07 +000025429 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
25430 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
sewardj56c30312012-06-12 08:45:39 +000025431 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
sewardje8a7eb72012-06-12 14:59:17 +000025432 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000025433 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000025434 && epartIsReg(getUChar(delta))) {
25435 Int rS = eregOfRexRM(pfx,getUChar(delta));
25436 Int rD = getVexNvvvv(pfx);
sewardjc4530ae2012-05-21 10:18:49 +000025437 IRTemp vecS = newTemp(Ity_V128);
sewardj251b59e2012-05-25 13:51:07 +000025438 if (gregLO3ofRM(getUChar(delta)) == 3) {
25439 Int imm = (Int)getUChar(delta+1);
25440 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
25441 delta += 2;
25442 assign( vecS, getXMMReg(rS) );
25443 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
25444 *uses_vvvv = True;
25445 goto decode_success;
25446 }
25447 if (gregLO3ofRM(getUChar(delta)) == 7) {
25448 Int imm = (Int)getUChar(delta+1);
25449 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
25450 delta += 2;
25451 assign( vecS, getXMMReg(rS) );
25452 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
25453 *uses_vvvv = True;
25454 goto decode_success;
25455 }
sewardj56c30312012-06-12 08:45:39 +000025456 if (gregLO3ofRM(getUChar(delta)) == 2) {
25457 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25458 "vpsrlq", Iop_ShrN64x2 );
25459 *uses_vvvv = True;
25460 goto decode_success;
25461 }
sewardje8a7eb72012-06-12 14:59:17 +000025462 if (gregLO3ofRM(getUChar(delta)) == 6) {
25463 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25464 "vpsllq", Iop_ShlN64x2 );
25465 *uses_vvvv = True;
25466 goto decode_success;
25467 }
sewardj251b59e2012-05-25 13:51:07 +000025468 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000025469 }
sewardjcc3d2192013-03-27 11:37:33 +000025470 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
25471 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
25472 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
25473 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
25474 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
25475 && epartIsReg(getUChar(delta))) {
25476 Int rS = eregOfRexRM(pfx,getUChar(delta));
25477 Int rD = getVexNvvvv(pfx);
25478 if (gregLO3ofRM(getUChar(delta)) == 3) {
25479 IRTemp vecS0 = newTemp(Ity_V128);
25480 IRTemp vecS1 = newTemp(Ity_V128);
25481 Int imm = (Int)getUChar(delta+1);
25482 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
25483 delta += 2;
25484 assign( vecS0, getYMMRegLane128(rS, 0));
25485 assign( vecS1, getYMMRegLane128(rS, 1));
25486 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
25487 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
25488 *uses_vvvv = True;
25489 goto decode_success;
25490 }
25491 if (gregLO3ofRM(getUChar(delta)) == 7) {
25492 IRTemp vecS0 = newTemp(Ity_V128);
25493 IRTemp vecS1 = newTemp(Ity_V128);
25494 Int imm = (Int)getUChar(delta+1);
25495 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
25496 delta += 2;
25497 assign( vecS0, getYMMRegLane128(rS, 0));
25498 assign( vecS1, getYMMRegLane128(rS, 1));
25499 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
25500 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
25501 *uses_vvvv = True;
25502 goto decode_success;
25503 }
25504 if (gregLO3ofRM(getUChar(delta)) == 2) {
25505 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25506 "vpsrlq", Iop_ShrN64x4 );
25507 *uses_vvvv = True;
25508 goto decode_success;
25509 }
25510 if (gregLO3ofRM(getUChar(delta)) == 6) {
25511 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25512 "vpsllq", Iop_ShlN64x4 );
25513 *uses_vvvv = True;
25514 goto decode_success;
25515 }
25516 /* else fall through */
25517 }
sewardjc4530ae2012-05-21 10:18:49 +000025518 break;
25519
sewardj8ef22422012-05-24 16:29:18 +000025520 case 0x74:
sewardj251b59e2012-05-25 13:51:07 +000025521 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
sewardj8ef22422012-05-24 16:29:18 +000025522 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
25523 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25524 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25525 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
25526 goto decode_success;
25527 }
sewardjcc3d2192013-03-27 11:37:33 +000025528 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
25529 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
25530 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25531 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25532 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
25533 goto decode_success;
25534 }
sewardj8ef22422012-05-24 16:29:18 +000025535 break;
25536
sewardj6fcd43e2012-06-14 08:51:35 +000025537 case 0x75:
25538 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
25539 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
25540 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25541 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25542 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
25543 goto decode_success;
25544 }
sewardjcc3d2192013-03-27 11:37:33 +000025545 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
25546 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
25547 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25548 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25549 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
25550 goto decode_success;
25551 }
sewardj6fcd43e2012-06-14 08:51:35 +000025552 break;
25553
sewardjc4530ae2012-05-21 10:18:49 +000025554 case 0x76:
sewardj251b59e2012-05-25 13:51:07 +000025555 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
sewardjc4530ae2012-05-21 10:18:49 +000025556 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
25557 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25558 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25559 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
25560 goto decode_success;
25561 }
sewardjcc3d2192013-03-27 11:37:33 +000025562 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
25563 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
25564 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25565 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25566 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
25567 goto decode_success;
25568 }
sewardjc4530ae2012-05-21 10:18:49 +000025569 break;
25570
25571 case 0x77:
25572 /* VZEROUPPER = VEX.128.0F.WIG 77 */
25573 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25574 Int i;
25575 IRTemp zero128 = newTemp(Ity_V128);
25576 assign(zero128, mkV128(0));
25577 for (i = 0; i < 16; i++) {
25578 putYMMRegLane128(i, 1, mkexpr(zero128));
25579 }
25580 DIP("vzeroupper\n");
25581 goto decode_success;
25582 }
sewardj66becf32012-06-18 23:15:16 +000025583 /* VZEROALL = VEX.256.0F.WIG 77 */
25584 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25585 Int i;
25586 IRTemp zero128 = newTemp(Ity_V128);
25587 assign(zero128, mkV128(0));
25588 for (i = 0; i < 16; i++) {
25589 putYMMRegLoAndZU(i, mkexpr(zero128));
25590 }
25591 DIP("vzeroall\n");
25592 goto decode_success;
25593 }
sewardjc4530ae2012-05-21 10:18:49 +000025594 break;
25595
sewardjadf357c2012-06-24 13:44:17 +000025596 case 0x7C:
25597 case 0x7D:
25598 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
25599 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
25600 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25601 IRTemp sV = newTemp(Ity_V128);
25602 IRTemp dV = newTemp(Ity_V128);
25603 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025604 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025605 UChar modrm = getUChar(delta);
25606 UInt rG = gregOfRexRM(pfx,modrm);
25607 UInt rV = getVexNvvvv(pfx);
25608 if (epartIsReg(modrm)) {
25609 UInt rE = eregOfRexRM(pfx,modrm);
25610 assign( sV, getXMMReg(rE) );
25611 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
25612 nameXMMReg(rV), nameXMMReg(rG));
25613 delta += 1;
25614 } else {
25615 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25616 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
25617 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25618 nameXMMReg(rV), nameXMMReg(rG));
25619 delta += alen;
25620 }
25621 assign( dV, getXMMReg(rV) );
25622 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
25623 *uses_vvvv = True;
25624 goto decode_success;
25625 }
25626 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
25627 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
25628 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25629 IRTemp sV = newTemp(Ity_V256);
25630 IRTemp dV = newTemp(Ity_V256);
25631 IRTemp s1, s0, d1, d0;
25632 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025633 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025634 UChar modrm = getUChar(delta);
25635 UInt rG = gregOfRexRM(pfx,modrm);
25636 UInt rV = getVexNvvvv(pfx);
25637 s1 = s0 = d1 = d0 = IRTemp_INVALID;
25638 if (epartIsReg(modrm)) {
25639 UInt rE = eregOfRexRM(pfx,modrm);
25640 assign( sV, getYMMReg(rE) );
25641 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
25642 nameYMMReg(rV), nameYMMReg(rG));
25643 delta += 1;
25644 } else {
25645 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25646 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
25647 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25648 nameYMMReg(rV), nameYMMReg(rG));
25649 delta += alen;
25650 }
25651 assign( dV, getYMMReg(rV) );
25652 breakupV256toV128s( dV, &d1, &d0 );
25653 breakupV256toV128s( sV, &s1, &s0 );
25654 putYMMReg( rG, binop(Iop_V128HLtoV256,
25655 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
25656 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
25657 *uses_vvvv = True;
25658 goto decode_success;
25659 }
25660 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
25661 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
25662 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25663 IRTemp sV = newTemp(Ity_V128);
25664 IRTemp dV = newTemp(Ity_V128);
25665 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025666 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025667 UChar modrm = getUChar(delta);
25668 UInt rG = gregOfRexRM(pfx,modrm);
25669 UInt rV = getVexNvvvv(pfx);
25670 if (epartIsReg(modrm)) {
25671 UInt rE = eregOfRexRM(pfx,modrm);
25672 assign( sV, getXMMReg(rE) );
25673 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
25674 nameXMMReg(rV), nameXMMReg(rG));
25675 delta += 1;
25676 } else {
25677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25678 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
25679 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25680 nameXMMReg(rV), nameXMMReg(rG));
25681 delta += alen;
25682 }
25683 assign( dV, getXMMReg(rV) );
25684 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
25685 *uses_vvvv = True;
25686 goto decode_success;
25687 }
25688 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
25689 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
25690 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25691 IRTemp sV = newTemp(Ity_V256);
25692 IRTemp dV = newTemp(Ity_V256);
25693 IRTemp s1, s0, d1, d0;
25694 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025695 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025696 UChar modrm = getUChar(delta);
25697 UInt rG = gregOfRexRM(pfx,modrm);
25698 UInt rV = getVexNvvvv(pfx);
25699 s1 = s0 = d1 = d0 = IRTemp_INVALID;
25700 if (epartIsReg(modrm)) {
25701 UInt rE = eregOfRexRM(pfx,modrm);
25702 assign( sV, getYMMReg(rE) );
25703 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
25704 nameYMMReg(rV), nameYMMReg(rG));
25705 delta += 1;
25706 } else {
25707 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25708 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
25709 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25710 nameYMMReg(rV), nameYMMReg(rG));
25711 delta += alen;
25712 }
25713 assign( dV, getYMMReg(rV) );
25714 breakupV256toV128s( dV, &d1, &d0 );
25715 breakupV256toV128s( sV, &s1, &s0 );
25716 putYMMReg( rG, binop(Iop_V128HLtoV256,
25717 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
25718 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
25719 *uses_vvvv = True;
25720 goto decode_success;
25721 }
25722 break;
25723
sewardjc4530ae2012-05-21 10:18:49 +000025724 case 0x7E:
25725 /* Note the Intel docs don't make sense for this. I think they
25726 are wrong. They seem to imply it is a store when in fact I
25727 think it is a load. Also it's unclear whether this is W0, W1
25728 or WIG. */
sewardj6be43242012-05-22 23:12:13 +000025729 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
sewardjc4530ae2012-05-21 10:18:49 +000025730 if (haveF3no66noF2(pfx)
25731 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25732 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
25733 UChar modrm = getUChar(delta);
25734 UInt rG = gregOfRexRM(pfx,modrm);
25735 if (epartIsReg(modrm)) {
25736 UInt rE = eregOfRexRM(pfx,modrm);
25737 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
25738 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
25739 delta += 1;
25740 } else {
25741 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25742 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
25743 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
25744 delta += alen;
25745 }
25746 /* zero bits 255:64 */
25747 putXMMRegLane64( rG, 1, mkU64(0) );
25748 putYMMRegLane128( rG, 1, mkV128(0) );
25749 goto decode_success;
25750 }
sewardj6be43242012-05-22 23:12:13 +000025751 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
25752 /* Moves from G to E, so is a store-form insn */
sewardj251b59e2012-05-25 13:51:07 +000025753 /* Intel docs list this in the VMOVD entry for some reason. */
sewardj6be43242012-05-22 23:12:13 +000025754 if (have66noF2noF3(pfx)
sewardj82096922012-06-24 14:57:59 +000025755 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
sewardj6be43242012-05-22 23:12:13 +000025756 UChar modrm = getUChar(delta);
25757 UInt rG = gregOfRexRM(pfx,modrm);
sewardj82096922012-06-24 14:57:59 +000025758 if (epartIsReg(modrm)) {
25759 UInt rE = eregOfRexRM(pfx,modrm);
25760 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
25761 putIReg64(rE, getXMMRegLane64(rG, 0));
25762 delta += 1;
25763 } else {
25764 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25765 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
25766 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
25767 delta += alen;
25768 }
sewardj6be43242012-05-22 23:12:13 +000025769 goto decode_success;
25770 }
sewardj6faf7cc2012-05-25 15:53:01 +000025771 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
sewardj72df0682012-05-23 23:54:30 +000025772 /* Moves from G to E, so is a store-form insn */
25773 if (have66noF2noF3(pfx)
sewardj251b59e2012-05-25 13:51:07 +000025774 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
sewardj72df0682012-05-23 23:54:30 +000025775 UChar modrm = getUChar(delta);
25776 UInt rG = gregOfRexRM(pfx,modrm);
sewardj251b59e2012-05-25 13:51:07 +000025777 if (epartIsReg(modrm)) {
25778 UInt rE = eregOfRexRM(pfx,modrm);
25779 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
25780 putIReg32(rE, getXMMRegLane32(rG, 0));
25781 delta += 1;
25782 } else {
25783 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25784 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
25785 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
25786 delta += alen;
25787 }
sewardj72df0682012-05-23 23:54:30 +000025788 goto decode_success;
25789 }
sewardjc4530ae2012-05-21 10:18:49 +000025790 break;
25791
25792 case 0x7F:
25793 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
sewardj66becf32012-06-18 23:15:16 +000025794 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
25795 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25796 && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000025797 UChar modrm = getUChar(delta);
25798 UInt rS = gregOfRexRM(pfx, modrm);
25799 IRTemp tS = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000025800 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025801 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025802 assign(tS, getYMMReg(rS));
25803 if (epartIsReg(modrm)) {
25804 UInt rD = eregOfRexRM(pfx, modrm);
25805 delta += 1;
25806 putYMMReg(rD, mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000025807 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
sewardjc4530ae2012-05-21 10:18:49 +000025808 } else {
25809 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25810 delta += alen;
sewardj66becf32012-06-18 23:15:16 +000025811 if (isA)
25812 gen_SEGV_if_not_32_aligned(addr);
sewardjc4530ae2012-05-21 10:18:49 +000025813 storeLE(mkexpr(addr), mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000025814 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
sewardjc4530ae2012-05-21 10:18:49 +000025815 }
25816 goto decode_success;
25817 }
25818 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
25819 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
25820 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25821 && 0==getVexL(pfx)/*128*/) {
25822 UChar modrm = getUChar(delta);
25823 UInt rS = gregOfRexRM(pfx, modrm);
25824 IRTemp tS = newTemp(Ity_V128);
25825 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025826 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025827 assign(tS, getXMMReg(rS));
25828 if (epartIsReg(modrm)) {
25829 UInt rD = eregOfRexRM(pfx, modrm);
25830 delta += 1;
25831 putYMMRegLoAndZU(rD, mkexpr(tS));
25832 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25833 } else {
25834 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25835 delta += alen;
25836 if (isA)
25837 gen_SEGV_if_not_16_aligned(addr);
25838 storeLE(mkexpr(addr), mkexpr(tS));
25839 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
25840 }
25841 goto decode_success;
25842 }
25843 break;
25844
sewardjfe0c5e72012-06-15 15:48:07 +000025845 case 0xAE:
25846 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
25847 if (haveNo66noF2noF3(pfx)
25848 && 0==getVexL(pfx)/*LZ*/
25849 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
25850 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
25851 && sz == 4) {
25852 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
25853 goto decode_success;
25854 }
25855 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
25856 if (haveNo66noF2noF3(pfx)
25857 && 0==getVexL(pfx)/*LZ*/
25858 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
25859 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
25860 && sz == 4) {
25861 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
25862 goto decode_success;
25863 }
25864 break;
25865
sewardjc4530ae2012-05-21 10:18:49 +000025866 case 0xC2:
25867 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
25868 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
25869 if (haveF2no66noF3(pfx)) {
25870 Long delta0 = delta;
25871 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25872 "vcmpsd", False/*!all_lanes*/,
25873 8/*sz*/);
25874 if (delta > delta0) goto decode_success;
25875 /* else fall through -- decoding has failed */
25876 }
25877 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
25878 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
25879 if (haveF3no66noF2(pfx)) {
25880 Long delta0 = delta;
25881 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25882 "vcmpss", False/*!all_lanes*/,
25883 4/*sz*/);
25884 if (delta > delta0) goto decode_success;
25885 /* else fall through -- decoding has failed */
25886 }
sewardj89378162012-06-24 12:12:20 +000025887 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
sewardj4b1cc832012-06-13 11:10:20 +000025888 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
25889 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25890 Long delta0 = delta;
25891 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25892 "vcmppd", True/*all_lanes*/,
25893 8/*sz*/);
25894 if (delta > delta0) goto decode_success;
25895 /* else fall through -- decoding has failed */
25896 }
sewardj89378162012-06-24 12:12:20 +000025897 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
25898 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
25899 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25900 Long delta0 = delta;
25901 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25902 "vcmppd", 8/*sz*/);
25903 if (delta > delta0) goto decode_success;
25904 /* else fall through -- decoding has failed */
25905 }
25906 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
25907 /* = VEX.NDS.128.0F.WIG C2 /r ib */
25908 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25909 Long delta0 = delta;
25910 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25911 "vcmpps", True/*all_lanes*/,
25912 4/*sz*/);
25913 if (delta > delta0) goto decode_success;
25914 /* else fall through -- decoding has failed */
25915 }
25916 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
25917 /* = VEX.NDS.256.0F.WIG C2 /r ib */
25918 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25919 Long delta0 = delta;
25920 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25921 "vcmpps", 4/*sz*/);
25922 if (delta > delta0) goto decode_success;
25923 /* else fall through -- decoding has failed */
25924 }
sewardjc4530ae2012-05-21 10:18:49 +000025925 break;
25926
sewardj21459cb2012-06-18 14:05:52 +000025927 case 0xC4:
25928 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
25929 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25930 UChar modrm = getUChar(delta);
25931 UInt rG = gregOfRexRM(pfx, modrm);
25932 UInt rV = getVexNvvvv(pfx);
25933 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000025934 IRTemp new16 = newTemp(Ity_I16);
sewardj21459cb2012-06-18 14:05:52 +000025935
25936 if ( epartIsReg( modrm ) ) {
25937 imm8 = (Int)(getUChar(delta+1) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000025938 assign( new16, unop(Iop_32to16,
25939 getIReg32(eregOfRexRM(pfx,modrm))) );
sewardj21459cb2012-06-18 14:05:52 +000025940 delta += 1+1;
25941 DIP( "vpinsrw $%d,%s,%s\n", imm8,
25942 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
25943 } else {
25944 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25945 imm8 = (Int)(getUChar(delta+alen) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000025946 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
sewardj21459cb2012-06-18 14:05:52 +000025947 delta += alen+1;
25948 DIP( "vpinsrw $%d,%s,%s\n",
25949 imm8, dis_buf, nameXMMReg(rG) );
25950 }
25951
sewardj4ed05e02012-06-18 15:01:30 +000025952 IRTemp src_vec = newTemp(Ity_V128);
25953 assign(src_vec, getXMMReg( rV ));
25954 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
25955 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000025956 *uses_vvvv = True;
25957 goto decode_success;
25958 }
sewardj4ed05e02012-06-18 15:01:30 +000025959 break;
sewardj21459cb2012-06-18 14:05:52 +000025960
sewardje8a7eb72012-06-12 14:59:17 +000025961 case 0xC5:
25962 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
25963 if (have66noF2noF3(pfx)
25964 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25965 Long delta0 = delta;
25966 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
25967 True/*isAvx*/ );
25968 if (delta > delta0) goto decode_success;
25969 /* else fall through -- decoding has failed */
25970 }
25971 break;
25972
sewardj251b59e2012-05-25 13:51:07 +000025973 case 0xC6:
25974 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
25975 /* = VEX.NDS.128.0F.WIG C6 /r ib */
25976 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25977 Int imm8 = 0;
25978 IRTemp eV = newTemp(Ity_V128);
25979 IRTemp vV = newTemp(Ity_V128);
25980 UInt modrm = getUChar(delta);
25981 UInt rG = gregOfRexRM(pfx,modrm);
25982 UInt rV = getVexNvvvv(pfx);
25983 assign( vV, getXMMReg(rV) );
25984 if (epartIsReg(modrm)) {
25985 UInt rE = eregOfRexRM(pfx,modrm);
25986 assign( eV, getXMMReg(rE) );
25987 imm8 = (Int)getUChar(delta+1);
25988 delta += 1+1;
25989 DIP("vshufps $%d,%s,%s,%s\n",
25990 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
25991 } else {
25992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
25993 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
25994 imm8 = (Int)getUChar(delta+alen);
25995 delta += 1+alen;
25996 DIP("vshufps $%d,%s,%s,%s\n",
25997 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
25998 }
sewardj4b1cc832012-06-13 11:10:20 +000025999 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26000 putYMMRegLoAndZU( rG, mkexpr(res) );
26001 *uses_vvvv = True;
26002 goto decode_success;
26003 }
26004 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26005 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26006 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26007 Int imm8 = 0;
26008 IRTemp eV = newTemp(Ity_V256);
26009 IRTemp vV = newTemp(Ity_V256);
26010 UInt modrm = getUChar(delta);
26011 UInt rG = gregOfRexRM(pfx,modrm);
26012 UInt rV = getVexNvvvv(pfx);
26013 assign( vV, getYMMReg(rV) );
26014 if (epartIsReg(modrm)) {
26015 UInt rE = eregOfRexRM(pfx,modrm);
26016 assign( eV, getYMMReg(rE) );
26017 imm8 = (Int)getUChar(delta+1);
26018 delta += 1+1;
26019 DIP("vshufps $%d,%s,%s,%s\n",
26020 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26021 } else {
26022 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26023 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26024 imm8 = (Int)getUChar(delta+alen);
26025 delta += 1+alen;
26026 DIP("vshufps $%d,%s,%s,%s\n",
26027 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26028 }
26029 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26030 putYMMReg( rG, mkexpr(res) );
sewardj251b59e2012-05-25 13:51:07 +000026031 *uses_vvvv = True;
26032 goto decode_success;
26033 }
sewardj21459cb2012-06-18 14:05:52 +000026034 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26035 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26036 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26037 Int imm8 = 0;
26038 IRTemp eV = newTemp(Ity_V128);
26039 IRTemp vV = newTemp(Ity_V128);
26040 UInt modrm = getUChar(delta);
26041 UInt rG = gregOfRexRM(pfx,modrm);
26042 UInt rV = getVexNvvvv(pfx);
26043 assign( vV, getXMMReg(rV) );
26044 if (epartIsReg(modrm)) {
26045 UInt rE = eregOfRexRM(pfx,modrm);
26046 assign( eV, getXMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000026047 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000026048 delta += 1+1;
26049 DIP("vshufpd $%d,%s,%s,%s\n",
26050 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26051 } else {
26052 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26053 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000026054 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000026055 delta += 1+alen;
26056 DIP("vshufpd $%d,%s,%s,%s\n",
26057 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26058 }
26059 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26060 putYMMRegLoAndZU( rG, mkexpr(res) );
26061 *uses_vvvv = True;
26062 goto decode_success;
26063 }
26064 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26065 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26067 Int imm8 = 0;
26068 IRTemp eV = newTemp(Ity_V256);
26069 IRTemp vV = newTemp(Ity_V256);
26070 UInt modrm = getUChar(delta);
26071 UInt rG = gregOfRexRM(pfx,modrm);
26072 UInt rV = getVexNvvvv(pfx);
26073 assign( vV, getYMMReg(rV) );
26074 if (epartIsReg(modrm)) {
26075 UInt rE = eregOfRexRM(pfx,modrm);
26076 assign( eV, getYMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000026077 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000026078 delta += 1+1;
26079 DIP("vshufpd $%d,%s,%s,%s\n",
26080 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26081 } else {
26082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26083 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000026084 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000026085 delta += 1+alen;
26086 DIP("vshufpd $%d,%s,%s,%s\n",
26087 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26088 }
26089 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26090 putYMMReg( rG, mkexpr(res) );
26091 *uses_vvvv = True;
26092 goto decode_success;
26093 }
sewardj251b59e2012-05-25 13:51:07 +000026094 break;
26095
sewardj89378162012-06-24 12:12:20 +000026096 case 0xD0:
26097 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26098 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26099 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26100 uses_vvvv, vbi, pfx, delta,
26101 "vaddsubpd", math_ADDSUBPD_128 );
26102 goto decode_success;
26103 }
26104 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26105 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26106 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26107 uses_vvvv, vbi, pfx, delta,
26108 "vaddsubpd", math_ADDSUBPD_256 );
26109 goto decode_success;
26110 }
26111 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26112 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26113 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26114 uses_vvvv, vbi, pfx, delta,
26115 "vaddsubps", math_ADDSUBPS_128 );
26116 goto decode_success;
26117 }
26118 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26119 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26120 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26121 uses_vvvv, vbi, pfx, delta,
26122 "vaddsubps", math_ADDSUBPS_256 );
26123 goto decode_success;
26124 }
26125 break;
26126
sewardj4c0a7ac2012-06-21 09:08:19 +000026127 case 0xD1:
26128 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26129 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26130 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26131 "vpsrlw", Iop_ShrN16x8 );
26132 *uses_vvvv = True;
26133 goto decode_success;
26134
26135 }
sewardjcc3d2192013-03-27 11:37:33 +000026136 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26137 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26138 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26139 "vpsrlw", Iop_ShrN16x16 );
26140 *uses_vvvv = True;
26141 goto decode_success;
26142
26143 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026144 break;
26145
26146 case 0xD2:
26147 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26148 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26149 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26150 "vpsrld", Iop_ShrN32x4 );
26151 *uses_vvvv = True;
26152 goto decode_success;
26153 }
sewardjcc3d2192013-03-27 11:37:33 +000026154 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26155 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26156 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26157 "vpsrld", Iop_ShrN32x8 );
26158 *uses_vvvv = True;
26159 goto decode_success;
26160 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026161 break;
26162
26163 case 0xD3:
26164 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26165 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26166 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26167 "vpsrlq", Iop_ShrN64x2 );
26168 *uses_vvvv = True;
26169 goto decode_success;
26170 }
sewardjcc3d2192013-03-27 11:37:33 +000026171 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26172 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26173 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26174 "vpsrlq", Iop_ShrN64x4 );
26175 *uses_vvvv = True;
26176 goto decode_success;
26177 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026178 break;
26179
sewardj98d02cc2012-06-02 11:55:25 +000026180 case 0xD4:
26181 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26182 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26183 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26184 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26185 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26186 goto decode_success;
26187 }
sewardjcc3d2192013-03-27 11:37:33 +000026188 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26189 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26190 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26191 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26192 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26193 goto decode_success;
26194 }
sewardj98d02cc2012-06-02 11:55:25 +000026195 break;
26196
sewardj251b59e2012-05-25 13:51:07 +000026197 case 0xD5:
26198 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26199 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26200 delta = dis_AVX128_E_V_to_G(
26201 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
26202 goto decode_success;
26203 }
sewardjcc3d2192013-03-27 11:37:33 +000026204 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26205 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26206 delta = dis_AVX256_E_V_to_G(
26207 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
26208 goto decode_success;
26209 }
sewardj251b59e2012-05-25 13:51:07 +000026210 break;
26211
sewardjc4530ae2012-05-21 10:18:49 +000026212 case 0xD6:
26213 /* I can't even find any Intel docs for this one. */
26214 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26215 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26216 (WIG, maybe?) */
sewardj6eaf00c2012-05-23 11:33:56 +000026217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardjc4530ae2012-05-21 10:18:49 +000026218 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
26219 UChar modrm = getUChar(delta);
26220 UInt rG = gregOfRexRM(pfx,modrm);
26221 if (epartIsReg(modrm)) {
26222 /* fall through, awaiting test case */
26223 /* dst: lo half copied, hi half zeroed */
26224 } else {
26225 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26226 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
26227 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
26228 delta += alen;
26229 goto decode_success;
26230 }
26231 }
26232 break;
26233
sewardj8ef22422012-05-24 16:29:18 +000026234 case 0xD7:
26235 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26236 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26237 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
26238 goto decode_success;
26239 }
sewardjcc3d2192013-03-27 11:37:33 +000026240 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26242 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
26243 goto decode_success;
26244 }
sewardj8ef22422012-05-24 16:29:18 +000026245 break;
26246
sewardj251b59e2012-05-25 13:51:07 +000026247 case 0xD8:
26248 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26249 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26250 delta = dis_AVX128_E_V_to_G(
26251 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
26252 goto decode_success;
26253 }
sewardjcc3d2192013-03-27 11:37:33 +000026254 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26255 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26256 delta = dis_AVX256_E_V_to_G(
26257 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
26258 goto decode_success;
26259 }
26260 break;
sewardj251b59e2012-05-25 13:51:07 +000026261
sewardj6fcd43e2012-06-14 08:51:35 +000026262 case 0xD9:
26263 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26264 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26265 delta = dis_AVX128_E_V_to_G(
26266 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
26267 goto decode_success;
26268 }
sewardjcc3d2192013-03-27 11:37:33 +000026269 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26270 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26271 delta = dis_AVX256_E_V_to_G(
26272 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
26273 goto decode_success;
26274 }
sewardj6fcd43e2012-06-14 08:51:35 +000026275 break;
26276
sewardje8a7eb72012-06-12 14:59:17 +000026277 case 0xDA:
26278 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26279 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26280 delta = dis_AVX128_E_V_to_G(
26281 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
26282 goto decode_success;
26283 }
sewardjcc3d2192013-03-27 11:37:33 +000026284 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26285 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26286 delta = dis_AVX256_E_V_to_G(
26287 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
26288 goto decode_success;
26289 }
sewardje8a7eb72012-06-12 14:59:17 +000026290 break;
26291
sewardj8ef22422012-05-24 16:29:18 +000026292 case 0xDB:
sewardj251b59e2012-05-25 13:51:07 +000026293 /* VPAND r/m, rV, r ::: r = rV & r/m */
sewardj8ef22422012-05-24 16:29:18 +000026294 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26295 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26296 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26297 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
26298 goto decode_success;
26299 }
sewardjcc3d2192013-03-27 11:37:33 +000026300 /* VPAND r/m, rV, r ::: r = rV & r/m */
26301 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26302 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26303 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26304 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
26305 goto decode_success;
26306 }
sewardj8ef22422012-05-24 16:29:18 +000026307 break;
26308
sewardj251b59e2012-05-25 13:51:07 +000026309 case 0xDC:
26310 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26311 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26312 delta = dis_AVX128_E_V_to_G(
26313 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
26314 goto decode_success;
26315 }
sewardjcc3d2192013-03-27 11:37:33 +000026316 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26317 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26318 delta = dis_AVX256_E_V_to_G(
26319 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
26320 goto decode_success;
26321 }
sewardj251b59e2012-05-25 13:51:07 +000026322 break;
26323
26324 case 0xDD:
26325 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26326 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26327 delta = dis_AVX128_E_V_to_G(
26328 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
26329 goto decode_success;
26330 }
sewardjcc3d2192013-03-27 11:37:33 +000026331 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
26332 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26333 delta = dis_AVX256_E_V_to_G(
26334 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
26335 goto decode_success;
26336 }
sewardj251b59e2012-05-25 13:51:07 +000026337 break;
26338
sewardje8a7eb72012-06-12 14:59:17 +000026339 case 0xDE:
26340 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
26341 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26342 delta = dis_AVX128_E_V_to_G(
26343 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
26344 goto decode_success;
26345 }
sewardjcc3d2192013-03-27 11:37:33 +000026346 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
26347 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26348 delta = dis_AVX256_E_V_to_G(
26349 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
26350 goto decode_success;
26351 }
sewardje8a7eb72012-06-12 14:59:17 +000026352 break;
26353
26354 case 0xDF:
26355 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
26356 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
26357 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26358 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
26359 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
26360 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
26361 goto decode_success;
26362 }
sewardjcc3d2192013-03-27 11:37:33 +000026363 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
26364 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
26365 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26366 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
26367 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
26368 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
26369 goto decode_success;
26370 }
sewardje8a7eb72012-06-12 14:59:17 +000026371 break;
26372
sewardj8516a1f2012-06-24 14:26:30 +000026373 case 0xE0:
26374 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
26375 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26376 delta = dis_AVX128_E_V_to_G(
26377 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
26378 goto decode_success;
26379 }
sewardjcc3d2192013-03-27 11:37:33 +000026380 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
26381 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26382 delta = dis_AVX256_E_V_to_G(
26383 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
26384 goto decode_success;
26385 }
sewardj8516a1f2012-06-24 14:26:30 +000026386 break;
26387
sewardj4c0a7ac2012-06-21 09:08:19 +000026388 case 0xE1:
26389 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
26390 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26391 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26392 "vpsraw", Iop_SarN16x8 );
26393 *uses_vvvv = True;
26394 goto decode_success;
26395 }
sewardjcc3d2192013-03-27 11:37:33 +000026396 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
26397 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26398 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26399 "vpsraw", Iop_SarN16x16 );
26400 *uses_vvvv = True;
26401 goto decode_success;
26402 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026403 break;
26404
26405 case 0xE2:
26406 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
26407 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26408 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26409 "vpsrad", Iop_SarN32x4 );
26410 *uses_vvvv = True;
26411 goto decode_success;
26412 }
sewardjcc3d2192013-03-27 11:37:33 +000026413 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
26414 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26415 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26416 "vpsrad", Iop_SarN32x8 );
26417 *uses_vvvv = True;
26418 goto decode_success;
26419 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026420 break;
26421
sewardj8516a1f2012-06-24 14:26:30 +000026422 case 0xE3:
26423 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
26424 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26425 delta = dis_AVX128_E_V_to_G(
26426 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
26427 goto decode_success;
26428 }
sewardjcc3d2192013-03-27 11:37:33 +000026429 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
26430 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26431 delta = dis_AVX256_E_V_to_G(
26432 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
26433 goto decode_success;
26434 }
sewardj8516a1f2012-06-24 14:26:30 +000026435 break;
26436
sewardj251b59e2012-05-25 13:51:07 +000026437 case 0xE4:
26438 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
26439 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26440 delta = dis_AVX128_E_V_to_G(
26441 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
26442 goto decode_success;
26443 }
sewardjcc3d2192013-03-27 11:37:33 +000026444 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
26445 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26446 delta = dis_AVX256_E_V_to_G(
26447 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
26448 goto decode_success;
26449 }
sewardj251b59e2012-05-25 13:51:07 +000026450 break;
26451
sewardjfe0c5e72012-06-15 15:48:07 +000026452 case 0xE5:
26453 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
26454 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26455 delta = dis_AVX128_E_V_to_G(
26456 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
26457 goto decode_success;
26458 }
sewardjcc3d2192013-03-27 11:37:33 +000026459 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
26460 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26461 delta = dis_AVX256_E_V_to_G(
26462 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
26463 goto decode_success;
26464 }
sewardjfe0c5e72012-06-15 15:48:07 +000026465 break;
26466
sewardj4b1cc832012-06-13 11:10:20 +000026467 case 0xE6:
26468 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
26469 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
26470 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
26471 goto decode_success;
26472 }
sewardj6fcd43e2012-06-14 08:51:35 +000026473 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
26474 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
26475 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
26476 goto decode_success;
26477 }
sewardj66becf32012-06-18 23:15:16 +000026478 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
26479 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26480 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
26481 True/*r2zero*/);
26482 goto decode_success;
26483 }
26484 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
26485 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26486 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
26487 goto decode_success;
26488 }
26489 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
26490 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26491 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
26492 False/*!r2zero*/);
26493 goto decode_success;
26494 }
26495 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
26496 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26497 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
26498 goto decode_success;
26499 }
sewardj4b1cc832012-06-13 11:10:20 +000026500 break;
26501
sewardj6eaf00c2012-05-23 11:33:56 +000026502 case 0xE7:
sewardj8eb7ae82012-06-24 14:00:27 +000026503 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
sewardj6eaf00c2012-05-23 11:33:56 +000026504 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26505 UChar modrm = getUChar(delta);
26506 UInt rG = gregOfRexRM(pfx,modrm);
26507 if (!epartIsReg(modrm)) {
26508 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26509 gen_SEGV_if_not_16_aligned( addr );
26510 storeLE( mkexpr(addr), getXMMReg(rG) );
26511 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
26512 delta += alen;
26513 goto decode_success;
26514 }
26515 /* else fall through */
26516 }
sewardj8eb7ae82012-06-24 14:00:27 +000026517 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
26518 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26519 UChar modrm = getUChar(delta);
26520 UInt rG = gregOfRexRM(pfx,modrm);
26521 if (!epartIsReg(modrm)) {
26522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26523 gen_SEGV_if_not_32_aligned( addr );
26524 storeLE( mkexpr(addr), getYMMReg(rG) );
26525 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
26526 delta += alen;
26527 goto decode_success;
26528 }
26529 /* else fall through */
26530 }
sewardj6eaf00c2012-05-23 11:33:56 +000026531 break;
26532
sewardj4f228902012-06-21 09:17:58 +000026533 case 0xE8:
26534 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
26535 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26536 delta = dis_AVX128_E_V_to_G(
26537 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
26538 goto decode_success;
26539 }
sewardjcc3d2192013-03-27 11:37:33 +000026540 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
26541 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26542 delta = dis_AVX256_E_V_to_G(
26543 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
26544 goto decode_success;
26545 }
sewardj8516a1f2012-06-24 14:26:30 +000026546 break;
sewardj4f228902012-06-21 09:17:58 +000026547
26548 case 0xE9:
26549 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
26550 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26551 delta = dis_AVX128_E_V_to_G(
26552 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
26553 goto decode_success;
26554 }
sewardjcc3d2192013-03-27 11:37:33 +000026555 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
26556 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26557 delta = dis_AVX256_E_V_to_G(
26558 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
26559 goto decode_success;
26560 }
sewardj4f228902012-06-21 09:17:58 +000026561 break;
26562
sewardje8a7eb72012-06-12 14:59:17 +000026563 case 0xEA:
26564 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
26565 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
26566 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26567 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26568 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
26569 goto decode_success;
26570 }
sewardjcc3d2192013-03-27 11:37:33 +000026571 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
26572 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
26573 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26574 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26575 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
26576 goto decode_success;
26577 }
sewardje8a7eb72012-06-12 14:59:17 +000026578 break;
26579
sewardjc4530ae2012-05-21 10:18:49 +000026580 case 0xEB:
sewardj251b59e2012-05-25 13:51:07 +000026581 /* VPOR r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026582 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
26583 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26584 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26585 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
26586 goto decode_success;
26587 }
sewardjcc3d2192013-03-27 11:37:33 +000026588 /* VPOR r/m, rV, r ::: r = rV | r/m */
26589 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
26590 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26591 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26592 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
26593 goto decode_success;
26594 }
sewardjc4530ae2012-05-21 10:18:49 +000026595 break;
26596
sewardj8516a1f2012-06-24 14:26:30 +000026597 case 0xEC:
26598 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
26599 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26600 delta = dis_AVX128_E_V_to_G(
26601 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
26602 goto decode_success;
26603 }
sewardjcc3d2192013-03-27 11:37:33 +000026604 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
26605 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26606 delta = dis_AVX256_E_V_to_G(
26607 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
26608 goto decode_success;
26609 }
sewardj8516a1f2012-06-24 14:26:30 +000026610 break;
26611
26612 case 0xED:
26613 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
26614 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26615 delta = dis_AVX128_E_V_to_G(
26616 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
26617 goto decode_success;
26618 }
sewardjcc3d2192013-03-27 11:37:33 +000026619 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
26620 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26621 delta = dis_AVX256_E_V_to_G(
26622 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
26623 goto decode_success;
26624 }
sewardj8516a1f2012-06-24 14:26:30 +000026625 break;
26626
sewardje8a7eb72012-06-12 14:59:17 +000026627 case 0xEE:
26628 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
26629 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
26630 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26631 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26632 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
26633 goto decode_success;
26634 }
sewardjcc3d2192013-03-27 11:37:33 +000026635 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
26636 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
26637 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26638 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26639 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
26640 goto decode_success;
26641 }
sewardje8a7eb72012-06-12 14:59:17 +000026642 break;
26643
sewardjc4530ae2012-05-21 10:18:49 +000026644 case 0xEF:
sewardj251b59e2012-05-25 13:51:07 +000026645 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026646 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
26647 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26648 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26649 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
26650 goto decode_success;
26651 }
sewardjcc3d2192013-03-27 11:37:33 +000026652 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
26653 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
26654 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26655 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26656 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
26657 goto decode_success;
26658 }
sewardjc4530ae2012-05-21 10:18:49 +000026659 break;
26660
sewardjadf357c2012-06-24 13:44:17 +000026661 case 0xF0:
26662 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
26663 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26664 UChar modrm = getUChar(delta);
26665 UInt rD = gregOfRexRM(pfx, modrm);
26666 IRTemp tD = newTemp(Ity_V256);
26667 if (epartIsReg(modrm)) break;
26668 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26669 delta += alen;
26670 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
26671 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
26672 putYMMReg(rD, mkexpr(tD));
26673 goto decode_success;
26674 }
26675 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
26676 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26677 UChar modrm = getUChar(delta);
26678 UInt rD = gregOfRexRM(pfx, modrm);
26679 IRTemp tD = newTemp(Ity_V128);
26680 if (epartIsReg(modrm)) break;
26681 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26682 delta += alen;
26683 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
26684 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
26685 putYMMRegLoAndZU(rD, mkexpr(tD));
26686 goto decode_success;
26687 }
26688 break;
26689
sewardj4c0a7ac2012-06-21 09:08:19 +000026690 case 0xF1:
26691 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
26692 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26693 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26694 "vpsllw", Iop_ShlN16x8 );
26695 *uses_vvvv = True;
26696 goto decode_success;
26697
26698 }
sewardjcc3d2192013-03-27 11:37:33 +000026699 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
26700 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26701 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26702 "vpsllw", Iop_ShlN16x16 );
26703 *uses_vvvv = True;
26704 goto decode_success;
26705
26706 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026707 break;
26708
26709 case 0xF2:
26710 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
26711 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26712 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26713 "vpslld", Iop_ShlN32x4 );
26714 *uses_vvvv = True;
26715 goto decode_success;
26716 }
sewardjcc3d2192013-03-27 11:37:33 +000026717 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
26718 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26719 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26720 "vpslld", Iop_ShlN32x8 );
26721 *uses_vvvv = True;
26722 goto decode_success;
26723 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026724 break;
26725
26726 case 0xF3:
26727 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
26728 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26729 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26730 "vpsllq", Iop_ShlN64x2 );
26731 *uses_vvvv = True;
26732 goto decode_success;
26733 }
sewardjcc3d2192013-03-27 11:37:33 +000026734 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
26735 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26736 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26737 "vpsllq", Iop_ShlN64x4 );
26738 *uses_vvvv = True;
26739 goto decode_success;
26740 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026741 break;
26742
sewardje8a7eb72012-06-12 14:59:17 +000026743 case 0xF4:
26744 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
26745 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26746 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26747 uses_vvvv, vbi, pfx, delta,
26748 "vpmuludq", math_PMULUDQ_128 );
sewardj89378162012-06-24 12:12:20 +000026749 goto decode_success;
26750 }
sewardjcc3d2192013-03-27 11:37:33 +000026751 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
26752 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26753 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26754 uses_vvvv, vbi, pfx, delta,
26755 "vpmuludq", math_PMULUDQ_256 );
26756 goto decode_success;
26757 }
sewardj89378162012-06-24 12:12:20 +000026758 break;
26759
26760 case 0xF5:
26761 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
26762 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26763 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26764 uses_vvvv, vbi, pfx, delta,
26765 "vpmaddwd", math_PMADDWD_128 );
26766 goto decode_success;
sewardje8a7eb72012-06-12 14:59:17 +000026767 }
sewardjcc3d2192013-03-27 11:37:33 +000026768 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
26769 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26770 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26771 uses_vvvv, vbi, pfx, delta,
26772 "vpmaddwd", math_PMADDWD_256 );
26773 goto decode_success;
26774 }
sewardje8a7eb72012-06-12 14:59:17 +000026775 break;
26776
sewardj82096922012-06-24 14:57:59 +000026777 case 0xF6:
26778 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
26779 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26780 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26781 uses_vvvv, vbi, pfx, delta,
26782 "vpsadbw", math_PSADBW_128 );
26783 goto decode_success;
26784 }
sewardjcc3d2192013-03-27 11:37:33 +000026785 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
26786 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26787 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26788 uses_vvvv, vbi, pfx, delta,
26789 "vpsadbw", math_PSADBW_256 );
26790 goto decode_success;
26791 }
sewardj82096922012-06-24 14:57:59 +000026792 break;
26793
sewardj8eb7ae82012-06-24 14:00:27 +000026794 case 0xF7:
26795 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
26796 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26797 && epartIsReg(getUChar(delta))) {
26798 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
26799 goto decode_success;
26800 }
26801 break;
26802
sewardjc4530ae2012-05-21 10:18:49 +000026803 case 0xF8:
sewardj251b59e2012-05-25 13:51:07 +000026804 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000026805 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
sewardjc4530ae2012-05-21 10:18:49 +000026806 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26807 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26808 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
26809 goto decode_success;
26810 }
sewardjcc3d2192013-03-27 11:37:33 +000026811 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
26812 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
26813 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26814 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26815 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
26816 goto decode_success;
26817 }
sewardjc4530ae2012-05-21 10:18:49 +000026818 break;
26819
sewardj98d02cc2012-06-02 11:55:25 +000026820 case 0xF9:
26821 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
26822 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
26823 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26824 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26825 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
26826 goto decode_success;
26827 }
sewardjcc3d2192013-03-27 11:37:33 +000026828 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
26829 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
26830 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26831 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26832 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
26833 goto decode_success;
26834 }
sewardj98d02cc2012-06-02 11:55:25 +000026835 break;
26836
sewardjc4530ae2012-05-21 10:18:49 +000026837 case 0xFA:
sewardj251b59e2012-05-25 13:51:07 +000026838 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000026839 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
sewardjc4530ae2012-05-21 10:18:49 +000026840 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26841 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26842 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
26843 goto decode_success;
26844 }
sewardjcc3d2192013-03-27 11:37:33 +000026845 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
26846 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
26847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26848 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26849 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
26850 goto decode_success;
26851 }
sewardjc4530ae2012-05-21 10:18:49 +000026852 break;
26853
sewardj98d02cc2012-06-02 11:55:25 +000026854 case 0xFB:
26855 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
26856 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
26857 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26858 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26859 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
26860 goto decode_success;
26861 }
sewardjcc3d2192013-03-27 11:37:33 +000026862 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
26863 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
26864 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26865 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26866 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
26867 goto decode_success;
26868 }
sewardj98d02cc2012-06-02 11:55:25 +000026869 break;
26870
sewardj6fcd43e2012-06-14 08:51:35 +000026871 case 0xFC:
26872 /* VPADDB r/m, rV, r ::: r = rV + r/m */
26873 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
26874 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26875 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26876 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
26877 goto decode_success;
26878 }
sewardjcc3d2192013-03-27 11:37:33 +000026879 /* VPADDB r/m, rV, r ::: r = rV + r/m */
26880 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
26881 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26882 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26883 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
26884 goto decode_success;
26885 }
sewardj6fcd43e2012-06-14 08:51:35 +000026886 break;
26887
sewardj6faf7cc2012-05-25 15:53:01 +000026888 case 0xFD:
26889 /* VPADDW r/m, rV, r ::: r = rV + r/m */
26890 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
26891 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26892 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26893 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
26894 goto decode_success;
26895 }
sewardjcc3d2192013-03-27 11:37:33 +000026896 /* VPADDW r/m, rV, r ::: r = rV + r/m */
26897 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
26898 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26899 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26900 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
26901 goto decode_success;
26902 }
sewardj6faf7cc2012-05-25 15:53:01 +000026903 break;
26904
sewardjc4530ae2012-05-21 10:18:49 +000026905 case 0xFE:
sewardj251b59e2012-05-25 13:51:07 +000026906 /* VPADDD r/m, rV, r ::: r = rV + r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026907 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
26908 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26909 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26910 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
26911 goto decode_success;
26912 }
sewardjcc3d2192013-03-27 11:37:33 +000026913 /* VPADDD r/m, rV, r ::: r = rV + r/m */
26914 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
26915 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26916 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26917 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
26918 goto decode_success;
26919 }
sewardjc4530ae2012-05-21 10:18:49 +000026920 break;
26921
26922 default:
26923 break;
26924
26925 }
26926
26927 //decode_failure:
26928 return deltaIN;
26929
26930 decode_success:
26931 return delta;
26932}
26933
26934
26935/*------------------------------------------------------------*/
26936/*--- ---*/
26937/*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
26938/*--- ---*/
26939/*------------------------------------------------------------*/
26940
sewardjd8bca7e2012-06-20 11:46:19 +000026941static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
26942{
26943 /* In the control vector, zero out all but the bottom two bits of
26944 each 32-bit lane. */
26945 IRExpr* cv1 = binop(Iop_ShrN32x4,
26946 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
26947 mkU8(30));
26948 /* And use the resulting cleaned-up control vector as steering
26949 in a Perm operation. */
26950 IRTemp res = newTemp(Ity_V128);
26951 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
26952 return res;
26953}
26954
26955static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
26956{
26957 IRTemp dHi, dLo, cHi, cLo;
26958 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26959 breakupV256toV128s( dataV, &dHi, &dLo );
26960 breakupV256toV128s( ctrlV, &cHi, &cLo );
26961 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
26962 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
26963 IRTemp res = newTemp(Ity_V256);
26964 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
26965 return res;
26966}
26967
26968static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
26969{
26970 /* No cleverness here .. */
26971 IRTemp dHi, dLo, cHi, cLo;
26972 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26973 breakupV128to64s( dataV, &dHi, &dLo );
26974 breakupV128to64s( ctrlV, &cHi, &cLo );
26975 IRExpr* rHi
florian99dd03e2013-01-29 03:56:06 +000026976 = IRExpr_ITE( unop(Iop_64to1,
26977 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
26978 mkexpr(dHi), mkexpr(dLo) );
sewardjd8bca7e2012-06-20 11:46:19 +000026979 IRExpr* rLo
florian99dd03e2013-01-29 03:56:06 +000026980 = IRExpr_ITE( unop(Iop_64to1,
26981 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
26982 mkexpr(dHi), mkexpr(dLo) );
sewardjd8bca7e2012-06-20 11:46:19 +000026983 IRTemp res = newTemp(Ity_V128);
26984 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
26985 return res;
26986}
26987
26988static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
26989{
26990 IRTemp dHi, dLo, cHi, cLo;
26991 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26992 breakupV256toV128s( dataV, &dHi, &dLo );
26993 breakupV256toV128s( ctrlV, &cHi, &cLo );
26994 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
26995 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
26996 IRTemp res = newTemp(Ity_V256);
26997 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
26998 return res;
26999}
27000
sewardjcc3d2192013-03-27 11:37:33 +000027001static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27002{
27003 /* In the control vector, zero out all but the bottom three bits of
27004 each 32-bit lane. */
27005 IRExpr* cv1 = binop(Iop_ShrN32x8,
27006 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27007 mkU8(29));
27008 /* And use the resulting cleaned-up control vector as steering
27009 in a Perm operation. */
27010 IRTemp res = newTemp(Ity_V256);
27011 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27012 return res;
27013}
27014
27015static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
floriancacba8e2014-12-15 18:58:07 +000027016 const VexAbiInfo* vbi, Prefix pfx, Long delta,
sewardjcc3d2192013-03-27 11:37:33 +000027017 const HChar* opname, IROp op8 )
27018{
27019 HChar dis_buf[50];
27020 Int alen;
27021 Int size = getRexW(pfx) ? 8 : 4;
27022 IRType ty = szToITy(size);
27023 IRTemp src = newTemp(ty);
27024 IRTemp amt = newTemp(ty);
27025 UChar rm = getUChar(delta);
27026
27027 assign( amt, getIRegV(size,pfx) );
27028 if (epartIsReg(rm)) {
27029 assign( src, getIRegE(size,pfx,rm) );
27030 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27031 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27032 delta++;
27033 } else {
27034 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27035 assign( src, loadLE(ty, mkexpr(addr)) );
27036 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27037 nameIRegG(size,pfx,rm));
27038 delta += alen;
27039 }
27040
27041 putIRegG( size, pfx, rm,
27042 binop(mkSizedOp(ty,op8), mkexpr(src),
27043 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27044 mkU(ty,8*size-1)))) );
27045 /* Flags aren't modified. */
27046 *uses_vvvv = True;
27047 return delta;
27048}
27049
27050
floriancacba8e2014-12-15 18:58:07 +000027051static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
sewardjcc3d2192013-03-27 11:37:33 +000027052{
27053 UChar modrm = getUChar(delta);
27054 UInt rG = gregOfRexRM(pfx, modrm);
27055 UInt rV = getVexNvvvv(pfx);
27056 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27057 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27058 IRType vty = scalar ? ty : getVexL(pfx) ? Ity_V256 : Ity_V128;
27059 IRTemp vX = newTemp(vty);
27060 IRTemp vY = newTemp(vty);
27061 IRTemp vZ = newTemp(vty);
27062 IRExpr *x[8], *y[8], *z[8];
27063 IRTemp addr = IRTemp_INVALID;
27064 HChar dis_buf[50];
27065 Int alen = 0;
27066 const HChar *name;
27067 const HChar *suffix;
27068 const HChar *order;
27069 Bool negateRes = False;
27070 Bool negateZeven = False;
27071 Bool negateZodd = False;
27072 Int i, j;
27073 Int count;
27074 static IROp ops[] = { Iop_V256to64_0, Iop_V256to64_1,
27075 Iop_V256to64_2, Iop_V256to64_3,
27076 Iop_V128to64, Iop_V128HIto64 };
27077
27078 switch (opc & 0xF) {
27079 case 0x6:
27080 name = "addsub";
27081 negateZeven = True;
27082 break;
27083 case 0x7:
27084 name = "subadd";
27085 negateZodd = True;
27086 break;
27087 case 0x8:
27088 case 0x9:
27089 name = "add";
27090 break;
27091 case 0xA:
27092 case 0xB:
27093 name = "sub";
27094 negateZeven = True;
27095 negateZodd = True;
27096 break;
27097 case 0xC:
27098 case 0xD:
27099 name = "add";
27100 negateRes = True;
27101 negateZeven = True;
27102 negateZodd = True;
27103 break;
27104 case 0xE:
27105 case 0xF:
27106 name = "sub";
27107 negateRes = True;
27108 break;
27109 default:
27110 vpanic("dis_FMA(amd64)");
27111 break;
27112 }
27113 switch (opc & 0xF0) {
27114 case 0x90: order = "132"; break;
27115 case 0xA0: order = "213"; break;
27116 case 0xB0: order = "231"; break;
27117 default: vpanic("dis_FMA(amd64)"); break;
27118 }
27119 if (scalar)
27120 suffix = ty == Ity_F64 ? "sd" : "ss";
27121 else
27122 suffix = ty == Ity_F64 ? "pd" : "ps";
27123
27124 if (scalar) {
27125 assign( vX, ty == Ity_F64
27126 ? getXMMRegLane64F(rG, 0) : getXMMRegLane32F(rG, 0) );
27127 assign( vZ, ty == Ity_F64
27128 ? getXMMRegLane64F(rV, 0) : getXMMRegLane32F(rV, 0) );
27129 } else {
27130 assign( vX, vty == Ity_V256 ? getYMMReg(rG) : getXMMReg(rG) );
27131 assign( vZ, vty == Ity_V256 ? getYMMReg(rV) : getXMMReg(rV) );
27132 }
27133
27134 if (epartIsReg(modrm)) {
27135 UInt rE = eregOfRexRM(pfx, modrm);
27136 delta += 1;
27137 if (scalar)
27138 assign( vY, ty == Ity_F64
27139 ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
27140 else
27141 assign( vY, vty == Ity_V256 ? getYMMReg(rE) : getXMMReg(rE) );
27142 if (vty == Ity_V256) {
27143 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27144 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27145 nameYMMReg(rG));
27146 } else {
27147 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27148 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27149 nameXMMReg(rG));
27150 }
27151 } else {
27152 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27153 delta += alen;
27154 assign(vY, loadLE(vty, mkexpr(addr)));
27155 if (vty == Ity_V256) {
27156 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27157 name, order, suffix, dis_buf, nameYMMReg(rV),
27158 nameYMMReg(rG));
27159 } else {
27160 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27161 name, order, suffix, dis_buf, nameXMMReg(rV),
27162 nameXMMReg(rG));
27163 }
27164 }
27165
27166 /* vX/vY/vZ now in 132 order. If it is different order, swap the
27167 arguments. */
27168 if ((opc & 0xF0) != 0x90) {
27169 IRTemp tem = vX;
27170 if ((opc & 0xF0) == 0xA0) {
27171 vX = vZ;
27172 vZ = vY;
27173 vY = tem;
27174 } else {
27175 vX = vZ;
27176 vZ = tem;
27177 }
27178 }
27179
27180 if (scalar) {
27181 count = 1;
27182 x[0] = mkexpr(vX);
27183 y[0] = mkexpr(vY);
27184 z[0] = mkexpr(vZ);
27185 } else if (ty == Ity_F32) {
27186 count = vty == Ity_V256 ? 8 : 4;
27187 j = vty == Ity_V256 ? 0 : 4;
27188 for (i = 0; i < count; i += 2) {
27189 IRTemp tem = newTemp(Ity_I64);
27190 assign(tem, unop(ops[i / 2 + j], mkexpr(vX)));
27191 x[i] = unop(Iop_64to32, mkexpr(tem));
27192 x[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27193 tem = newTemp(Ity_I64);
27194 assign(tem, unop(ops[i / 2 + j], mkexpr(vY)));
27195 y[i] = unop(Iop_64to32, mkexpr(tem));
27196 y[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27197 tem = newTemp(Ity_I64);
27198 assign(tem, unop(ops[i / 2 + j], mkexpr(vZ)));
27199 z[i] = unop(Iop_64to32, mkexpr(tem));
27200 z[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27201 }
27202 } else {
27203 count = vty == Ity_V256 ? 4 : 2;
27204 j = vty == Ity_V256 ? 0 : 4;
27205 for (i = 0; i < count; i++) {
27206 x[i] = unop(ops[i + j], mkexpr(vX));
27207 y[i] = unop(ops[i + j], mkexpr(vY));
27208 z[i] = unop(ops[i + j], mkexpr(vZ));
27209 }
27210 }
27211 if (!scalar)
27212 for (i = 0; i < count; i++) {
27213 IROp op = ty == Ity_F64
27214 ? Iop_ReinterpI64asF64 : Iop_ReinterpI32asF32;
27215 x[i] = unop(op, x[i]);
27216 y[i] = unop(op, y[i]);
27217 z[i] = unop(op, z[i]);
27218 }
27219 for (i = 0; i < count; i++) {
27220 if ((i & 1) ? negateZodd : negateZeven)
27221 z[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, z[i]);
27222 x[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27223 get_FAKE_roundingmode(), x[i], y[i], z[i]);
27224 if (negateRes)
27225 x[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, x[i]);
27226 if (ty == Ity_F64)
27227 putYMMRegLane64F( rG, i, x[i] );
27228 else
27229 putYMMRegLane32F( rG, i, x[i] );
27230 }
27231 if (vty != Ity_V256)
27232 putYMMRegLane128( rG, 1, mkV128(0) );
27233
27234 return delta;
27235}
27236
27237
27238/* Masked load. */
floriancacba8e2014-12-15 18:58:07 +000027239static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000027240 Prefix pfx, Long delta,
27241 const HChar* opname, Bool isYMM, IRType ty )
27242{
27243 HChar dis_buf[50];
27244 Int alen, i;
27245 IRTemp addr;
27246 UChar modrm = getUChar(delta);
27247 UInt rG = gregOfRexRM(pfx,modrm);
27248 UInt rV = getVexNvvvv(pfx);
27249 IRTemp res[8], cond;
27250 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27251 if (isYMM) {
27252 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
27253 } else {
27254 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
27255 }
27256 delta += alen;
27257
27258 for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
27259 res[i] = newTemp(ty);
27260 cond = newTemp(Ity_I1);
27261 assign( cond,
27262 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27263 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27264 : getYMMRegLane64( rV, i ),
27265 mkU(ty, 0) ));
27266 assign( res[i],
27267 IRExpr_ITE(
27268 mkexpr(cond),
27269 loadLE(ty, IRExpr_ITE(
27270 mkexpr(cond),
27271 binop(Iop_Add64, mkexpr(addr),
27272 mkU64(i*(ty == Ity_I32 ? 4 : 8))),
27273 getIReg64(R_RSP)
27274 )
27275 ),
27276 mkU(ty, 0)
27277 )
27278 );
27279 }
27280 switch (ty) {
27281 case Ity_I32:
27282 for (i = 0; i < 8; i++)
27283 putYMMRegLane32( rG, i, (i < 4 || isYMM)
27284 ? mkexpr(res[i]) : mkU32(0) );
27285 break;
27286 case Ity_I64:
27287 for (i = 0; i < 4; i++)
27288 putYMMRegLane64( rG, i, (i < 2 || isYMM)
27289 ? mkexpr(res[i]) : mkU64(0) );
27290 break;
27291 default: vassert(0);
27292 }
27293
27294 *uses_vvvv = True;
27295 return delta;
27296}
27297
27298
27299/* Gather. */
floriancacba8e2014-12-15 18:58:07 +000027300static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
sewardjcc3d2192013-03-27 11:37:33 +000027301 Prefix pfx, Long delta,
27302 const HChar* opname, Bool isYMM,
27303 Bool isVM64x, IRType ty )
27304{
27305 HChar dis_buf[50];
27306 Int alen, i, vscale, count1, count2;
27307 IRTemp addr;
27308 UChar modrm = getUChar(delta);
27309 UInt rG = gregOfRexRM(pfx,modrm);
27310 UInt rV = getVexNvvvv(pfx);
27311 UInt rI;
27312 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
27313 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
27314 IRTemp cond;
27315 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
27316 idxTy, &vscale );
27317 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
27318 return delta;
27319 if (dstTy == Ity_V256) {
27320 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
27321 } else {
27322 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
27323 }
27324 delta += alen;
27325
27326 if (ty == Ity_I32) {
27327 count1 = isYMM ? 8 : 4;
27328 count2 = isVM64x ? count1 / 2 : count1;
27329 } else {
27330 count1 = count2 = isYMM ? 4 : 2;
27331 }
27332
27333 /* First update the mask register to copies of the sign bit. */
27334 if (ty == Ity_I32) {
27335 if (isYMM)
27336 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
27337 else
27338 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
27339 } else {
27340 for (i = 0; i < count1; i++) {
27341 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
27342 mkU8(63)) );
27343 }
27344 }
27345
27346 /* Next gather the individual elements. If any fault occurs, the
27347 corresponding mask element will be set and the loop stops. */
27348 for (i = 0; i < count2; i++) {
27349 IRExpr *expr, *addr_expr;
27350 cond = newTemp(Ity_I1);
27351 assign( cond,
27352 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27353 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27354 : getYMMRegLane64( rV, i ),
27355 mkU(ty, 0)) );
27356 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
27357 : getYMMRegLane64( rG, i );
27358 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
27359 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
27360 switch (vscale) {
27361 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
27362 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
27363 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
27364 default: break;
27365 }
27366 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
27367 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
27368 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
27369 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
27370 if (ty == Ity_I32) {
27371 putYMMRegLane32( rG, i, expr );
27372 putYMMRegLane32( rV, i, mkU32(0) );
27373 } else {
27374 putYMMRegLane64( rG, i, expr);
27375 putYMMRegLane64( rV, i, mkU64(0) );
27376 }
27377 }
27378
27379 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
27380 if (ty == Ity_I64 || isYMM)
27381 putYMMRegLane128( rV, 1, mkV128(0) );
27382 else if (ty == Ity_I32 && count2 == 2) {
27383 putYMMRegLane64( rV, 1, mkU64(0) );
27384 putYMMRegLane64( rG, 1, mkU64(0) );
27385 }
27386 putYMMRegLane128( rG, 1, mkV128(0) );
27387 }
27388
27389 *uses_vvvv = True;
27390 return delta;
27391}
27392
27393
sewardjc4530ae2012-05-21 10:18:49 +000027394__attribute__((noinline))
27395static
27396Long dis_ESC_0F38__VEX (
27397 /*MB_OUT*/DisResult* dres,
27398 /*OUT*/ Bool* uses_vvvv,
florianbeac5302014-12-31 12:09:38 +000027399 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardjc4530ae2012-05-21 10:18:49 +000027400 Bool resteerCisOk,
27401 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000027402 const VexArchInfo* archinfo,
27403 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000027404 Prefix pfx, Int sz, Long deltaIN
27405 )
27406{
sewardj4b1cc832012-06-13 11:10:20 +000027407 IRTemp addr = IRTemp_INVALID;
27408 Int alen = 0;
27409 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +000027410 Long delta = deltaIN;
27411 UChar opc = getUChar(delta);
27412 delta++;
27413 *uses_vvvv = False;
27414
27415 switch (opc) {
27416
27417 case 0x00:
sewardj251b59e2012-05-25 13:51:07 +000027418 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000027419 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
27420 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27421 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27422 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
27423 goto decode_success;
27424 }
sewardjcc3d2192013-03-27 11:37:33 +000027425 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
27426 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
27427 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27428 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27429 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
27430 goto decode_success;
27431 }
sewardjc4530ae2012-05-21 10:18:49 +000027432 break;
27433
sewardj8516a1f2012-06-24 14:26:30 +000027434 case 0x01:
27435 case 0x02:
27436 case 0x03:
27437 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
27438 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
27439 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
27440 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27441 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
27442 *uses_vvvv = True;
27443 goto decode_success;
27444 }
sewardjcc3d2192013-03-27 11:37:33 +000027445 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
27446 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
27447 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
27448 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27449 delta = dis_PHADD_256( vbi, pfx, delta, opc );
27450 *uses_vvvv = True;
27451 goto decode_success;
27452 }
sewardj8516a1f2012-06-24 14:26:30 +000027453 break;
27454
27455 case 0x04:
27456 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
27457 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27458 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27459 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
27460 math_PMADDUBSW_128 );
27461 goto decode_success;
27462 }
sewardjcc3d2192013-03-27 11:37:33 +000027463 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
27464 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27465 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27466 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
27467 math_PMADDUBSW_256 );
27468 goto decode_success;
27469 }
sewardj8516a1f2012-06-24 14:26:30 +000027470 break;
27471
27472 case 0x05:
27473 case 0x06:
27474 case 0x07:
27475 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
27476 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
27477 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
27478 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27479 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
27480 *uses_vvvv = True;
27481 goto decode_success;
27482 }
sewardjcc3d2192013-03-27 11:37:33 +000027483 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
27484 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
27485 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
27486 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27487 delta = dis_PHADD_256( vbi, pfx, delta, opc );
27488 *uses_vvvv = True;
27489 goto decode_success;
27490 }
sewardj8516a1f2012-06-24 14:26:30 +000027491 break;
27492
sewardj82096922012-06-24 14:57:59 +000027493 case 0x08:
27494 case 0x09:
27495 case 0x0A:
27496 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
27497 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
27498 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
27499 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27500 IRTemp sV = newTemp(Ity_V128);
27501 IRTemp dV = newTemp(Ity_V128);
27502 IRTemp sHi, sLo, dHi, dLo;
27503 sHi = sLo = dHi = dLo = IRTemp_INVALID;
florian5df8ab02012-10-13 19:34:19 +000027504 HChar ch = '?';
sewardj82096922012-06-24 14:57:59 +000027505 Int laneszB = 0;
27506 UChar modrm = getUChar(delta);
27507 UInt rG = gregOfRexRM(pfx,modrm);
27508 UInt rV = getVexNvvvv(pfx);
27509
27510 switch (opc) {
27511 case 0x08: laneszB = 1; ch = 'b'; break;
27512 case 0x09: laneszB = 2; ch = 'w'; break;
27513 case 0x0A: laneszB = 4; ch = 'd'; break;
27514 default: vassert(0);
27515 }
27516
27517 assign( dV, getXMMReg(rV) );
27518
27519 if (epartIsReg(modrm)) {
27520 UInt rE = eregOfRexRM(pfx,modrm);
27521 assign( sV, getXMMReg(rE) );
27522 delta += 1;
27523 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
27524 nameXMMReg(rV), nameXMMReg(rG));
27525 } else {
27526 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27527 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
27528 delta += alen;
27529 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
27530 nameXMMReg(rV), nameXMMReg(rG));
27531 }
27532
27533 breakupV128to64s( dV, &dHi, &dLo );
27534 breakupV128to64s( sV, &sHi, &sLo );
27535
27536 putYMMRegLoAndZU(
27537 rG,
27538 binop(Iop_64HLtoV128,
27539 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
27540 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
27541 )
27542 );
27543 *uses_vvvv = True;
27544 goto decode_success;
27545 }
sewardjcc3d2192013-03-27 11:37:33 +000027546 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
27547 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
27548 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
27549 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27550 IRTemp sV = newTemp(Ity_V256);
27551 IRTemp dV = newTemp(Ity_V256);
27552 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
27553 s3 = s2 = s1 = s0 = IRTemp_INVALID;
27554 d3 = d2 = d1 = d0 = IRTemp_INVALID;
27555 UChar ch = '?';
27556 Int laneszB = 0;
27557 UChar modrm = getUChar(delta);
27558 UInt rG = gregOfRexRM(pfx,modrm);
27559 UInt rV = getVexNvvvv(pfx);
27560
27561 switch (opc) {
27562 case 0x08: laneszB = 1; ch = 'b'; break;
27563 case 0x09: laneszB = 2; ch = 'w'; break;
27564 case 0x0A: laneszB = 4; ch = 'd'; break;
27565 default: vassert(0);
27566 }
27567
27568 assign( dV, getYMMReg(rV) );
27569
27570 if (epartIsReg(modrm)) {
27571 UInt rE = eregOfRexRM(pfx,modrm);
27572 assign( sV, getYMMReg(rE) );
27573 delta += 1;
27574 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
27575 nameYMMReg(rV), nameYMMReg(rG));
27576 } else {
27577 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27578 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
27579 delta += alen;
27580 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
27581 nameYMMReg(rV), nameYMMReg(rG));
27582 }
27583
27584 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
27585 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
27586
27587 putYMMReg(
27588 rG,
27589 binop( Iop_V128HLtoV256,
27590 binop(Iop_64HLtoV128,
27591 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
27592 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
27593 ),
27594 binop(Iop_64HLtoV128,
27595 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
27596 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
27597 )
27598 )
27599 );
27600 *uses_vvvv = True;
27601 goto decode_success;
27602 }
sewardj82096922012-06-24 14:57:59 +000027603 break;
27604
27605 case 0x0B:
27606 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
27607 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27608 IRTemp sV = newTemp(Ity_V128);
27609 IRTemp dV = newTemp(Ity_V128);
27610 IRTemp sHi, sLo, dHi, dLo;
27611 sHi = sLo = dHi = dLo = IRTemp_INVALID;
27612 UChar modrm = getUChar(delta);
27613 UInt rG = gregOfRexRM(pfx,modrm);
27614 UInt rV = getVexNvvvv(pfx);
27615
27616 assign( dV, getXMMReg(rV) );
27617
27618 if (epartIsReg(modrm)) {
27619 UInt rE = eregOfRexRM(pfx,modrm);
27620 assign( sV, getXMMReg(rE) );
27621 delta += 1;
27622 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
27623 nameXMMReg(rV), nameXMMReg(rG));
27624 } else {
27625 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27626 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
27627 delta += alen;
27628 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
27629 nameXMMReg(rV), nameXMMReg(rG));
27630 }
27631
27632 breakupV128to64s( dV, &dHi, &dLo );
27633 breakupV128to64s( sV, &sHi, &sLo );
27634
27635 putYMMRegLoAndZU(
27636 rG,
27637 binop(Iop_64HLtoV128,
27638 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
27639 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
27640 )
27641 );
27642 *uses_vvvv = True;
27643 goto decode_success;
27644 }
sewardjcc3d2192013-03-27 11:37:33 +000027645 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
27646 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27647 IRTemp sV = newTemp(Ity_V256);
27648 IRTemp dV = newTemp(Ity_V256);
27649 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
27650 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
27651 UChar modrm = getUChar(delta);
27652 UInt rG = gregOfRexRM(pfx,modrm);
27653 UInt rV = getVexNvvvv(pfx);
27654
27655 assign( dV, getYMMReg(rV) );
27656
27657 if (epartIsReg(modrm)) {
27658 UInt rE = eregOfRexRM(pfx,modrm);
27659 assign( sV, getYMMReg(rE) );
27660 delta += 1;
27661 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
27662 nameYMMReg(rV), nameYMMReg(rG));
27663 } else {
27664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27665 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
27666 delta += alen;
27667 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
27668 nameYMMReg(rV), nameYMMReg(rG));
27669 }
27670
27671 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
27672 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
27673
27674 putYMMReg(
27675 rG,
27676 binop(Iop_V128HLtoV256,
27677 binop(Iop_64HLtoV128,
27678 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
27679 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
27680 binop(Iop_64HLtoV128,
27681 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
27682 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
27683 )
27684 );
27685 *uses_vvvv = True;
27686 goto decode_success;
27687 }
sewardj82096922012-06-24 14:57:59 +000027688 break;
27689
sewardjd8bca7e2012-06-20 11:46:19 +000027690 case 0x0C:
27691 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
27692 if (have66noF2noF3(pfx)
27693 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
27694 UChar modrm = getUChar(delta);
27695 UInt rG = gregOfRexRM(pfx, modrm);
27696 UInt rV = getVexNvvvv(pfx);
27697 IRTemp ctrlV = newTemp(Ity_V128);
27698 if (epartIsReg(modrm)) {
27699 UInt rE = eregOfRexRM(pfx, modrm);
27700 delta += 1;
27701 DIP("vpermilps %s,%s,%s\n",
27702 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
27703 assign(ctrlV, getXMMReg(rE));
27704 } else {
sewardj148e5942012-06-21 08:34:19 +000027705 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027706 delta += alen;
27707 DIP("vpermilps %s,%s,%s\n",
27708 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
27709 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
27710 }
27711 IRTemp dataV = newTemp(Ity_V128);
27712 assign(dataV, getXMMReg(rV));
27713 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
27714 putYMMRegLoAndZU(rG, mkexpr(resV));
27715 *uses_vvvv = True;
27716 goto decode_success;
27717 }
27718 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
27719 if (have66noF2noF3(pfx)
27720 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27721 UChar modrm = getUChar(delta);
27722 UInt rG = gregOfRexRM(pfx, modrm);
27723 UInt rV = getVexNvvvv(pfx);
27724 IRTemp ctrlV = newTemp(Ity_V256);
27725 if (epartIsReg(modrm)) {
27726 UInt rE = eregOfRexRM(pfx, modrm);
27727 delta += 1;
27728 DIP("vpermilps %s,%s,%s\n",
27729 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
27730 assign(ctrlV, getYMMReg(rE));
27731 } else {
sewardj148e5942012-06-21 08:34:19 +000027732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027733 delta += alen;
27734 DIP("vpermilps %s,%s,%s\n",
27735 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
27736 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
27737 }
27738 IRTemp dataV = newTemp(Ity_V256);
27739 assign(dataV, getYMMReg(rV));
27740 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
27741 putYMMReg(rG, mkexpr(resV));
27742 *uses_vvvv = True;
27743 goto decode_success;
27744 }
27745 break;
27746
27747 case 0x0D:
27748 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
27749 if (have66noF2noF3(pfx)
27750 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
27751 UChar modrm = getUChar(delta);
27752 UInt rG = gregOfRexRM(pfx, modrm);
27753 UInt rV = getVexNvvvv(pfx);
27754 IRTemp ctrlV = newTemp(Ity_V128);
27755 if (epartIsReg(modrm)) {
27756 UInt rE = eregOfRexRM(pfx, modrm);
27757 delta += 1;
27758 DIP("vpermilpd %s,%s,%s\n",
27759 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
27760 assign(ctrlV, getXMMReg(rE));
27761 } else {
sewardj148e5942012-06-21 08:34:19 +000027762 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027763 delta += alen;
27764 DIP("vpermilpd %s,%s,%s\n",
27765 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
27766 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
27767 }
27768 IRTemp dataV = newTemp(Ity_V128);
27769 assign(dataV, getXMMReg(rV));
27770 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
27771 putYMMRegLoAndZU(rG, mkexpr(resV));
27772 *uses_vvvv = True;
27773 goto decode_success;
27774 }
27775 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
27776 if (have66noF2noF3(pfx)
27777 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27778 UChar modrm = getUChar(delta);
27779 UInt rG = gregOfRexRM(pfx, modrm);
27780 UInt rV = getVexNvvvv(pfx);
27781 IRTemp ctrlV = newTemp(Ity_V256);
27782 if (epartIsReg(modrm)) {
27783 UInt rE = eregOfRexRM(pfx, modrm);
27784 delta += 1;
27785 DIP("vpermilpd %s,%s,%s\n",
27786 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
27787 assign(ctrlV, getYMMReg(rE));
27788 } else {
sewardj148e5942012-06-21 08:34:19 +000027789 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027790 delta += alen;
27791 DIP("vpermilpd %s,%s,%s\n",
27792 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
27793 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
27794 }
27795 IRTemp dataV = newTemp(Ity_V256);
27796 assign(dataV, getYMMReg(rV));
27797 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
27798 putYMMReg(rG, mkexpr(resV));
27799 *uses_vvvv = True;
27800 goto decode_success;
27801 }
27802 break;
27803
sewardjed1884d2012-06-21 08:53:48 +000027804 case 0x0E:
27805 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
27806 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27807 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
27808 goto decode_success;
27809 }
27810 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
27811 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27812 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
27813 goto decode_success;
27814 }
27815 break;
27816
27817 case 0x0F:
27818 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
27819 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27820 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
27821 goto decode_success;
27822 }
27823 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
27824 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27825 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
27826 goto decode_success;
27827 }
27828 break;
27829
sewardjcc3d2192013-03-27 11:37:33 +000027830 case 0x16:
27831 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
27832 if (have66noF2noF3(pfx)
27833 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27834 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27835 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
27836 goto decode_success;
27837 }
27838 break;
27839
sewardjed1884d2012-06-21 08:53:48 +000027840 case 0x17:
27841 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
27842 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27843 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
27844 goto decode_success;
27845 }
27846 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
27847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27848 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
27849 goto decode_success;
27850 }
27851 break;
27852
sewardj6fcd43e2012-06-14 08:51:35 +000027853 case 0x18:
sewardj151cd3e2012-06-18 13:56:55 +000027854 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000027855 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000027856 && 0==getVexL(pfx)/*128*/
sewardj6fcd43e2012-06-14 08:51:35 +000027857 && !epartIsReg(getUChar(delta))) {
27858 UChar modrm = getUChar(delta);
27859 UInt rG = gregOfRexRM(pfx, modrm);
27860 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27861 delta += alen;
27862 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
27863 IRTemp t32 = newTemp(Ity_I32);
27864 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
27865 IRTemp t64 = newTemp(Ity_I64);
27866 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27867 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
27868 putYMMRegLoAndZU(rG, res);
27869 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027870 }
sewardj151cd3e2012-06-18 13:56:55 +000027871 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
27872 if (have66noF2noF3(pfx)
27873 && 1==getVexL(pfx)/*256*/
27874 && !epartIsReg(getUChar(delta))) {
27875 UChar modrm = getUChar(delta);
27876 UInt rG = gregOfRexRM(pfx, modrm);
27877 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27878 delta += alen;
27879 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
27880 IRTemp t32 = newTemp(Ity_I32);
27881 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
27882 IRTemp t64 = newTemp(Ity_I64);
27883 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27884 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27885 mkexpr(t64), mkexpr(t64));
27886 putYMMReg(rG, res);
27887 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027888 }
sewardjcc3d2192013-03-27 11:37:33 +000027889 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
27890 if (have66noF2noF3(pfx)
27891 && 0==getVexL(pfx)/*128*/
27892 && epartIsReg(getUChar(delta))) {
27893 UChar modrm = getUChar(delta);
27894 UInt rG = gregOfRexRM(pfx, modrm);
27895 UInt rE = eregOfRexRM(pfx, modrm);
27896 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
27897 IRTemp t32 = newTemp(Ity_I32);
27898 assign(t32, getXMMRegLane32(rE, 0));
27899 IRTemp t64 = newTemp(Ity_I64);
27900 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27901 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
27902 putYMMRegLoAndZU(rG, res);
27903 delta++;
27904 goto decode_success;
27905 }
27906 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
27907 if (have66noF2noF3(pfx)
27908 && 1==getVexL(pfx)/*256*/
27909 && epartIsReg(getUChar(delta))) {
27910 UChar modrm = getUChar(delta);
27911 UInt rG = gregOfRexRM(pfx, modrm);
27912 UInt rE = eregOfRexRM(pfx, modrm);
27913 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
27914 IRTemp t32 = newTemp(Ity_I32);
27915 assign(t32, getXMMRegLane32(rE, 0));
27916 IRTemp t64 = newTemp(Ity_I64);
27917 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27918 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27919 mkexpr(t64), mkexpr(t64));
27920 putYMMReg(rG, res);
27921 delta++;
27922 goto decode_success;
27923 }
sewardj82096922012-06-24 14:57:59 +000027924 break;
sewardj6fcd43e2012-06-14 08:51:35 +000027925
sewardj4b1cc832012-06-13 11:10:20 +000027926 case 0x19:
sewardj151cd3e2012-06-18 13:56:55 +000027927 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
sewardj4b1cc832012-06-13 11:10:20 +000027928 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000027929 && 1==getVexL(pfx)/*256*/
sewardj4b1cc832012-06-13 11:10:20 +000027930 && !epartIsReg(getUChar(delta))) {
27931 UChar modrm = getUChar(delta);
27932 UInt rG = gregOfRexRM(pfx, modrm);
27933 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27934 delta += alen;
27935 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
27936 IRTemp t64 = newTemp(Ity_I64);
27937 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
27938 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27939 mkexpr(t64), mkexpr(t64));
27940 putYMMReg(rG, res);
27941 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027942 }
sewardjcc3d2192013-03-27 11:37:33 +000027943 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
27944 if (have66noF2noF3(pfx)
27945 && 1==getVexL(pfx)/*256*/
27946 && epartIsReg(getUChar(delta))) {
27947 UChar modrm = getUChar(delta);
27948 UInt rG = gregOfRexRM(pfx, modrm);
27949 UInt rE = eregOfRexRM(pfx, modrm);
27950 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
27951 IRTemp t64 = newTemp(Ity_I64);
27952 assign(t64, getXMMRegLane64(rE, 0));
27953 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27954 mkexpr(t64), mkexpr(t64));
27955 putYMMReg(rG, res);
27956 delta++;
27957 goto decode_success;
27958 }
sewardj82096922012-06-24 14:57:59 +000027959 break;
27960
27961 case 0x1A:
27962 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
27963 if (have66noF2noF3(pfx)
27964 && 1==getVexL(pfx)/*256*/
27965 && !epartIsReg(getUChar(delta))) {
27966 UChar modrm = getUChar(delta);
27967 UInt rG = gregOfRexRM(pfx, modrm);
27968 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27969 delta += alen;
27970 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
27971 IRTemp t128 = newTemp(Ity_V128);
27972 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
27973 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
27974 goto decode_success;
27975 }
27976 break;
sewardj4b1cc832012-06-13 11:10:20 +000027977
sewardj8516a1f2012-06-24 14:26:30 +000027978 case 0x1C:
27979 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
27980 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27981 delta = dis_AVX128_E_to_G_unary(
27982 uses_vvvv, vbi, pfx, delta,
27983 "vpabsb", math_PABS_XMM_pap1 );
27984 goto decode_success;
27985 }
sewardjcc3d2192013-03-27 11:37:33 +000027986 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
27987 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27988 delta = dis_AVX256_E_to_G_unary(
27989 uses_vvvv, vbi, pfx, delta,
27990 "vpabsb", math_PABS_YMM_pap1 );
27991 goto decode_success;
27992 }
sewardj8516a1f2012-06-24 14:26:30 +000027993 break;
27994
27995 case 0x1D:
27996 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
27997 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27998 delta = dis_AVX128_E_to_G_unary(
27999 uses_vvvv, vbi, pfx, delta,
28000 "vpabsw", math_PABS_XMM_pap2 );
28001 goto decode_success;
28002 }
sewardjcc3d2192013-03-27 11:37:33 +000028003 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28004 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28005 delta = dis_AVX256_E_to_G_unary(
28006 uses_vvvv, vbi, pfx, delta,
28007 "vpabsw", math_PABS_YMM_pap2 );
28008 goto decode_success;
28009 }
sewardj8516a1f2012-06-24 14:26:30 +000028010 break;
28011
sewardj97f72452012-05-23 05:56:53 +000028012 case 0x1E:
28013 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28014 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28015 delta = dis_AVX128_E_to_G_unary(
28016 uses_vvvv, vbi, pfx, delta,
28017 "vpabsd", math_PABS_XMM_pap4 );
28018 goto decode_success;
28019 }
sewardjcc3d2192013-03-27 11:37:33 +000028020 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28021 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28022 delta = dis_AVX256_E_to_G_unary(
28023 uses_vvvv, vbi, pfx, delta,
28024 "vpabsd", math_PABS_YMM_pap4 );
28025 goto decode_success;
28026 }
sewardj97f72452012-05-23 05:56:53 +000028027 break;
28028
sewardj6fcd43e2012-06-14 08:51:35 +000028029 case 0x20:
28030 /* VPMOVSXBW xmm2/m64, xmm1 */
28031 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28032 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28033 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28034 True/*isAvx*/, False/*!xIsZ*/ );
28035 goto decode_success;
28036 }
sewardjcc3d2192013-03-27 11:37:33 +000028037 /* VPMOVSXBW xmm2/m128, ymm1 */
28038 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28039 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28040 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28041 goto decode_success;
28042 }
sewardj6fcd43e2012-06-14 08:51:35 +000028043 break;
28044
sewardj4ed05e02012-06-18 15:01:30 +000028045 case 0x21:
28046 /* VPMOVSXBD xmm2/m32, xmm1 */
28047 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28048 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28049 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28050 True/*isAvx*/, False/*!xIsZ*/ );
28051 goto decode_success;
28052 }
sewardjcc3d2192013-03-27 11:37:33 +000028053 /* VPMOVSXBD xmm2/m64, ymm1 */
28054 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28055 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28056 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28057 goto decode_success;
28058 }
sewardj4ed05e02012-06-18 15:01:30 +000028059 break;
28060
sewardj8516a1f2012-06-24 14:26:30 +000028061 case 0x22:
28062 /* VPMOVSXBQ xmm2/m16, xmm1 */
28063 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28064 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28065 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28066 goto decode_success;
28067 }
sewardjcc3d2192013-03-27 11:37:33 +000028068 /* VPMOVSXBQ xmm2/m32, ymm1 */
28069 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28070 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28071 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28072 goto decode_success;
28073 }
sewardj8516a1f2012-06-24 14:26:30 +000028074 break;
28075
sewardj6fcd43e2012-06-14 08:51:35 +000028076 case 0x23:
28077 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28078 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28079 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28080 True/*isAvx*/, False/*!xIsZ*/ );
28081 goto decode_success;
28082 }
sewardjcc3d2192013-03-27 11:37:33 +000028083 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28084 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28085 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28086 goto decode_success;
28087 }
sewardj6fcd43e2012-06-14 08:51:35 +000028088 break;
28089
sewardj8516a1f2012-06-24 14:26:30 +000028090 case 0x24:
28091 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28092 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28093 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28094 goto decode_success;
28095 }
sewardjcc3d2192013-03-27 11:37:33 +000028096 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28097 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28098 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28099 goto decode_success;
28100 }
sewardj8516a1f2012-06-24 14:26:30 +000028101 break;
28102
sewardj6fcd43e2012-06-14 08:51:35 +000028103 case 0x25:
28104 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28105 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28106 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28107 True/*isAvx*/, False/*!xIsZ*/ );
28108 goto decode_success;
28109 }
sewardjcc3d2192013-03-27 11:37:33 +000028110 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28111 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28112 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28113 goto decode_success;
28114 }
sewardj6fcd43e2012-06-14 08:51:35 +000028115 break;
28116
sewardj89378162012-06-24 12:12:20 +000028117 case 0x28:
28118 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28119 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28120 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28121 uses_vvvv, vbi, pfx, delta,
28122 "vpmuldq", math_PMULDQ_128 );
28123 goto decode_success;
28124 }
sewardjcc3d2192013-03-27 11:37:33 +000028125 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28126 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28127 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28128 uses_vvvv, vbi, pfx, delta,
28129 "vpmuldq", math_PMULDQ_256 );
28130 goto decode_success;
28131 }
sewardj89378162012-06-24 12:12:20 +000028132 break;
28133
sewardj56c30312012-06-12 08:45:39 +000028134 case 0x29:
28135 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28136 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28137 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28138 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28139 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28140 goto decode_success;
28141 }
sewardjcc3d2192013-03-27 11:37:33 +000028142 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28143 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28144 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28145 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28146 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28147 goto decode_success;
28148 }
sewardj56c30312012-06-12 08:45:39 +000028149 break;
28150
sewardj8eb7ae82012-06-24 14:00:27 +000028151 case 0x2A:
28152 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28153 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28154 && !epartIsReg(getUChar(delta))) {
28155 UChar modrm = getUChar(delta);
28156 UInt rD = gregOfRexRM(pfx, modrm);
28157 IRTemp tD = newTemp(Ity_V128);
28158 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28159 delta += alen;
28160 gen_SEGV_if_not_16_aligned(addr);
28161 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28162 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28163 putYMMRegLoAndZU(rD, mkexpr(tD));
28164 goto decode_success;
28165 }
sewardjcc3d2192013-03-27 11:37:33 +000028166 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28167 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28168 && !epartIsReg(getUChar(delta))) {
28169 UChar modrm = getUChar(delta);
28170 UInt rD = gregOfRexRM(pfx, modrm);
28171 IRTemp tD = newTemp(Ity_V256);
28172 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28173 delta += alen;
28174 gen_SEGV_if_not_32_aligned(addr);
28175 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28176 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28177 putYMMReg(rD, mkexpr(tD));
28178 goto decode_success;
28179 }
sewardj8eb7ae82012-06-24 14:00:27 +000028180 break;
28181
sewardj8516a1f2012-06-24 14:26:30 +000028182 case 0x2B:
28183 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28184 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28185 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28186 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28187 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28188 Iop_QNarrowBin32Sto16Ux8, NULL,
28189 False/*!invertLeftArg*/, True/*swapArgs*/ );
28190 goto decode_success;
28191 }
sewardjcc3d2192013-03-27 11:37:33 +000028192 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28193 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28194 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28195 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28196 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28197 math_VPACKUSDW_YMM );
28198 goto decode_success;
28199 }
28200 break;
28201
28202 case 0x2C:
28203 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2C /r */
28204 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28205 && !epartIsReg(getUChar(delta))) {
28206 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28207 /*!isYMM*/False, Ity_I32 );
28208 goto decode_success;
28209 }
28210 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2C /r */
28211 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28212 && !epartIsReg(getUChar(delta))) {
28213 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28214 /*isYMM*/True, Ity_I32 );
28215 goto decode_success;
28216 }
28217 break;
28218
28219 case 0x2D:
28220 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2D /r */
28221 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28222 && !epartIsReg(getUChar(delta))) {
28223 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28224 /*!isYMM*/False, Ity_I64 );
28225 goto decode_success;
28226 }
28227 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2D /r */
28228 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28229 && !epartIsReg(getUChar(delta))) {
28230 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28231 /*isYMM*/True, Ity_I64 );
28232 goto decode_success;
28233 }
sewardj8516a1f2012-06-24 14:26:30 +000028234 break;
28235
sewardjc4530ae2012-05-21 10:18:49 +000028236 case 0x30:
28237 /* VPMOVZXBW xmm2/m64, xmm1 */
28238 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28239 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000028240 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28241 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000028242 goto decode_success;
28243 }
sewardjcc3d2192013-03-27 11:37:33 +000028244 /* VPMOVZXBW xmm2/m128, ymm1 */
28245 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28246 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28247 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
28248 goto decode_success;
28249 }
sewardjc4530ae2012-05-21 10:18:49 +000028250 break;
28251
sewardj4ed05e02012-06-18 15:01:30 +000028252 case 0x31:
28253 /* VPMOVZXBD xmm2/m32, xmm1 */
28254 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28255 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28256 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28257 True/*isAvx*/, True/*xIsZ*/ );
28258 goto decode_success;
28259 }
sewardjcc3d2192013-03-27 11:37:33 +000028260 /* VPMOVZXBD xmm2/m64, ymm1 */
28261 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28262 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28263 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
28264 goto decode_success;
28265 }
sewardj4ed05e02012-06-18 15:01:30 +000028266 break;
28267
sewardj8516a1f2012-06-24 14:26:30 +000028268 case 0x32:
28269 /* VPMOVZXBQ xmm2/m16, xmm1 */
28270 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28271 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28272 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28273 goto decode_success;
28274 }
sewardjcc3d2192013-03-27 11:37:33 +000028275 /* VPMOVZXBQ xmm2/m32, ymm1 */
28276 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28277 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28278 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
28279 goto decode_success;
28280 }
sewardj8516a1f2012-06-24 14:26:30 +000028281 break;
28282
sewardjc4530ae2012-05-21 10:18:49 +000028283 case 0x33:
28284 /* VPMOVZXWD xmm2/m64, xmm1 */
28285 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28286 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000028287 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28288 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000028289 goto decode_success;
28290 }
sewardjcc3d2192013-03-27 11:37:33 +000028291 /* VPMOVZXWD xmm2/m128, ymm1 */
28292 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28293 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28294 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
28295 goto decode_success;
28296 }
sewardjc4530ae2012-05-21 10:18:49 +000028297 break;
28298
sewardj8516a1f2012-06-24 14:26:30 +000028299 case 0x34:
28300 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28301 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28302 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28303 goto decode_success;
28304 }
sewardjcc3d2192013-03-27 11:37:33 +000028305 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
28306 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28307 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
28308 goto decode_success;
28309 }
sewardj8516a1f2012-06-24 14:26:30 +000028310 break;
28311
28312 case 0x35:
28313 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
28314 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28315 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28316 True/*isAvx*/, True/*xIsZ*/ );
28317 goto decode_success;
28318 }
sewardjcc3d2192013-03-27 11:37:33 +000028319 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
28320 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28321 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
28322 goto decode_success;
28323 }
28324 break;
28325
28326 case 0x36:
28327 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
28328 if (have66noF2noF3(pfx)
28329 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28330 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28331 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
28332 goto decode_success;
28333 }
sewardj8516a1f2012-06-24 14:26:30 +000028334 break;
28335
sewardj56c30312012-06-12 08:45:39 +000028336 case 0x37:
28337 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
28338 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
28339 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28340 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28341 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
28342 goto decode_success;
28343 }
sewardjcc3d2192013-03-27 11:37:33 +000028344 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
28345 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
28346 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28347 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28348 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
28349 goto decode_success;
28350 }
sewardj56c30312012-06-12 08:45:39 +000028351 break;
28352
sewardje8a7eb72012-06-12 14:59:17 +000028353 case 0x38:
28354 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
28355 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
28356 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28357 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28358 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
28359 goto decode_success;
28360 }
sewardjcc3d2192013-03-27 11:37:33 +000028361 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
28362 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
28363 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28364 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28365 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
28366 goto decode_success;
28367 }
sewardje8a7eb72012-06-12 14:59:17 +000028368 break;
28369
sewardjc4530ae2012-05-21 10:18:49 +000028370 case 0x39:
sewardj251b59e2012-05-25 13:51:07 +000028371 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000028372 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
28373 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28374 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28375 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
28376 goto decode_success;
28377 }
sewardjcc3d2192013-03-27 11:37:33 +000028378 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
28379 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
28380 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28381 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28382 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
28383 goto decode_success;
28384 }
sewardjc4530ae2012-05-21 10:18:49 +000028385 break;
28386
sewardje8a7eb72012-06-12 14:59:17 +000028387 case 0x3A:
28388 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
28389 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
28390 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28391 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28392 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
28393 goto decode_success;
28394 }
sewardjcc3d2192013-03-27 11:37:33 +000028395 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
28396 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
28397 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28398 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28399 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
28400 goto decode_success;
28401 }
sewardje8a7eb72012-06-12 14:59:17 +000028402 break;
28403
28404 case 0x3B:
28405 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
28406 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
28407 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28408 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28409 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
28410 goto decode_success;
28411 }
sewardjcc3d2192013-03-27 11:37:33 +000028412 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
28413 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
28414 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28415 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28416 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
28417 goto decode_success;
28418 }
sewardje8a7eb72012-06-12 14:59:17 +000028419 break;
28420
28421 case 0x3C:
28422 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
28423 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
28424 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28425 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28426 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
28427 goto decode_success;
28428 }
sewardjcc3d2192013-03-27 11:37:33 +000028429 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
28430 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
28431 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28432 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28433 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
28434 goto decode_success;
28435 }
sewardje8a7eb72012-06-12 14:59:17 +000028436 break;
28437
sewardjc4530ae2012-05-21 10:18:49 +000028438 case 0x3D:
sewardj251b59e2012-05-25 13:51:07 +000028439 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000028440 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
28441 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28442 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28443 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
28444 goto decode_success;
28445 }
sewardjcc3d2192013-03-27 11:37:33 +000028446 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
28447 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
28448 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28449 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28450 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
28451 goto decode_success;
28452 }
sewardjc4530ae2012-05-21 10:18:49 +000028453 break;
28454
sewardje8a7eb72012-06-12 14:59:17 +000028455 case 0x3E:
28456 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
28457 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
28458 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28459 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28460 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
28461 goto decode_success;
28462 }
sewardjcc3d2192013-03-27 11:37:33 +000028463 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
28464 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
28465 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28466 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28467 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
28468 goto decode_success;
28469 }
sewardje8a7eb72012-06-12 14:59:17 +000028470 break;
28471
28472 case 0x3F:
28473 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
28474 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
28475 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28476 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28477 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
28478 goto decode_success;
28479 }
sewardjcc3d2192013-03-27 11:37:33 +000028480 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
28481 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
28482 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28483 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28484 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
28485 goto decode_success;
28486 }
sewardje8a7eb72012-06-12 14:59:17 +000028487 break;
28488
28489 case 0x40:
28490 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
28491 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
28492 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28493 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28494 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
28495 goto decode_success;
28496 }
sewardjcc3d2192013-03-27 11:37:33 +000028497 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
28498 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
28499 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28500 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28501 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
28502 goto decode_success;
28503 }
sewardje8a7eb72012-06-12 14:59:17 +000028504 break;
28505
28506 case 0x41:
28507 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
28508 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28509 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
28510 goto decode_success;
28511 }
28512 break;
28513
sewardjcc3d2192013-03-27 11:37:33 +000028514 case 0x45:
28515 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
28516 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
28517 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28518 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
28519 Iop_Shr32, 1==getVexL(pfx) );
28520 *uses_vvvv = True;
28521 goto decode_success;
28522 }
28523 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
28524 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
28525 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
28526 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
28527 Iop_Shr64, 1==getVexL(pfx) );
28528 *uses_vvvv = True;
28529 goto decode_success;
28530 }
28531 break;
28532
28533 case 0x46:
28534 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
28535 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
28536 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28537 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
28538 Iop_Sar32, 1==getVexL(pfx) );
28539 *uses_vvvv = True;
28540 goto decode_success;
28541 }
28542 break;
28543
28544 case 0x47:
28545 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
28546 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
28547 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28548 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
28549 Iop_Shl32, 1==getVexL(pfx) );
28550 *uses_vvvv = True;
28551 goto decode_success;
28552 }
28553 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
28554 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
28555 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
28556 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
28557 Iop_Shl64, 1==getVexL(pfx) );
28558 *uses_vvvv = True;
28559 goto decode_success;
28560 }
28561 break;
28562
28563 case 0x58:
28564 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
28565 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28566 && 0==getRexW(pfx)/*W0*/) {
28567 UChar modrm = getUChar(delta);
28568 UInt rG = gregOfRexRM(pfx, modrm);
28569 IRTemp t32 = newTemp(Ity_I32);
28570 if (epartIsReg(modrm)) {
28571 UInt rE = eregOfRexRM(pfx, modrm);
28572 delta++;
28573 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28574 assign(t32, getXMMRegLane32(rE, 0));
28575 } else {
28576 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28577 delta += alen;
28578 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
28579 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28580 }
28581 IRTemp t64 = newTemp(Ity_I64);
28582 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28583 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28584 putYMMRegLoAndZU(rG, res);
28585 goto decode_success;
28586 }
28587 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
28588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28589 && 0==getRexW(pfx)/*W0*/) {
28590 UChar modrm = getUChar(delta);
28591 UInt rG = gregOfRexRM(pfx, modrm);
28592 IRTemp t32 = newTemp(Ity_I32);
28593 if (epartIsReg(modrm)) {
28594 UInt rE = eregOfRexRM(pfx, modrm);
28595 delta++;
28596 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28597 assign(t32, getXMMRegLane32(rE, 0));
28598 } else {
28599 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28600 delta += alen;
28601 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
28602 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28603 }
28604 IRTemp t64 = newTemp(Ity_I64);
28605 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28606 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28607 mkexpr(t64), mkexpr(t64));
28608 putYMMReg(rG, res);
28609 goto decode_success;
28610 }
28611 break;
28612
28613 case 0x59:
28614 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
28615 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28616 && 0==getRexW(pfx)/*W0*/) {
28617 UChar modrm = getUChar(delta);
28618 UInt rG = gregOfRexRM(pfx, modrm);
28619 IRTemp t64 = newTemp(Ity_I64);
28620 if (epartIsReg(modrm)) {
28621 UInt rE = eregOfRexRM(pfx, modrm);
28622 delta++;
28623 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28624 assign(t64, getXMMRegLane64(rE, 0));
28625 } else {
28626 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28627 delta += alen;
28628 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
28629 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28630 }
28631 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28632 putYMMRegLoAndZU(rG, res);
28633 goto decode_success;
28634 }
28635 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
28636 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28637 && 0==getRexW(pfx)/*W0*/) {
28638 UChar modrm = getUChar(delta);
28639 UInt rG = gregOfRexRM(pfx, modrm);
28640 IRTemp t64 = newTemp(Ity_I64);
28641 if (epartIsReg(modrm)) {
28642 UInt rE = eregOfRexRM(pfx, modrm);
28643 delta++;
28644 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28645 assign(t64, getXMMRegLane64(rE, 0));
28646 } else {
28647 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28648 delta += alen;
28649 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
28650 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28651 }
28652 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28653 mkexpr(t64), mkexpr(t64));
28654 putYMMReg(rG, res);
28655 goto decode_success;
28656 }
28657 break;
28658
28659 case 0x5A:
28660 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
28661 if (have66noF2noF3(pfx)
28662 && 1==getVexL(pfx)/*256*/
28663 && !epartIsReg(getUChar(delta))) {
28664 UChar modrm = getUChar(delta);
28665 UInt rG = gregOfRexRM(pfx, modrm);
28666 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28667 delta += alen;
28668 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
28669 IRTemp t128 = newTemp(Ity_V128);
28670 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28671 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28672 goto decode_success;
28673 }
28674 break;
28675
28676 case 0x78:
28677 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
28678 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28679 && 0==getRexW(pfx)/*W0*/) {
28680 UChar modrm = getUChar(delta);
28681 UInt rG = gregOfRexRM(pfx, modrm);
28682 IRTemp t8 = newTemp(Ity_I8);
28683 if (epartIsReg(modrm)) {
28684 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028685 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028686 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28687 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
28688 } else {
28689 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28690 delta += alen;
28691 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
28692 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
28693 }
28694 IRTemp t16 = newTemp(Ity_I16);
28695 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
28696 IRTemp t32 = newTemp(Ity_I32);
28697 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28698 IRTemp t64 = newTemp(Ity_I64);
28699 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28700 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28701 putYMMRegLoAndZU(rG, res);
28702 goto decode_success;
28703 }
28704 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
28705 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28706 && 0==getRexW(pfx)/*W0*/) {
28707 UChar modrm = getUChar(delta);
28708 UInt rG = gregOfRexRM(pfx, modrm);
28709 IRTemp t8 = newTemp(Ity_I8);
28710 if (epartIsReg(modrm)) {
28711 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028712 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028713 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28714 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
28715 } else {
28716 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28717 delta += alen;
28718 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
28719 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
28720 }
28721 IRTemp t16 = newTemp(Ity_I16);
28722 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
28723 IRTemp t32 = newTemp(Ity_I32);
28724 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28725 IRTemp t64 = newTemp(Ity_I64);
28726 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28727 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28728 mkexpr(t64), mkexpr(t64));
28729 putYMMReg(rG, res);
28730 goto decode_success;
28731 }
28732 break;
28733
28734 case 0x79:
28735 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
28736 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28737 && 0==getRexW(pfx)/*W0*/) {
28738 UChar modrm = getUChar(delta);
28739 UInt rG = gregOfRexRM(pfx, modrm);
28740 IRTemp t16 = newTemp(Ity_I16);
28741 if (epartIsReg(modrm)) {
28742 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028743 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028744 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28745 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
28746 } else {
28747 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28748 delta += alen;
28749 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
28750 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
28751 }
28752 IRTemp t32 = newTemp(Ity_I32);
28753 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28754 IRTemp t64 = newTemp(Ity_I64);
28755 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28756 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28757 putYMMRegLoAndZU(rG, res);
28758 goto decode_success;
28759 }
28760 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
28761 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28762 && 0==getRexW(pfx)/*W0*/) {
28763 UChar modrm = getUChar(delta);
28764 UInt rG = gregOfRexRM(pfx, modrm);
28765 IRTemp t16 = newTemp(Ity_I16);
28766 if (epartIsReg(modrm)) {
28767 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028768 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028769 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28770 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
28771 } else {
28772 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28773 delta += alen;
28774 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
28775 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
28776 }
28777 IRTemp t32 = newTemp(Ity_I32);
28778 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28779 IRTemp t64 = newTemp(Ity_I64);
28780 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28781 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28782 mkexpr(t64), mkexpr(t64));
28783 putYMMReg(rG, res);
28784 goto decode_success;
28785 }
28786 break;
28787
28788 case 0x8C:
28789 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
28790 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28791 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28792 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
28793 /*!isYMM*/False, Ity_I32 );
28794 goto decode_success;
28795 }
28796 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
28797 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28798 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28799 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
28800 /*isYMM*/True, Ity_I32 );
28801 goto decode_success;
28802 }
28803 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
28804 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28805 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28806 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
28807 /*!isYMM*/False, Ity_I64 );
28808 goto decode_success;
28809 }
28810 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
28811 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28812 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28813 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
28814 /*isYMM*/True, Ity_I64 );
28815 goto decode_success;
28816 }
28817 break;
28818
28819 case 0x90:
28820 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
28821 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28822 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28823 Long delta0 = delta;
28824 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
28825 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
28826 if (delta != delta0)
28827 goto decode_success;
28828 }
28829 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
28830 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28831 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28832 Long delta0 = delta;
28833 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
28834 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
28835 if (delta != delta0)
28836 goto decode_success;
28837 }
28838 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
28839 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28840 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28841 Long delta0 = delta;
28842 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
28843 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
28844 if (delta != delta0)
28845 goto decode_success;
28846 }
28847 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
28848 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28849 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28850 Long delta0 = delta;
28851 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
28852 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
28853 if (delta != delta0)
28854 goto decode_success;
28855 }
28856 break;
28857
28858 case 0x91:
28859 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
28860 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28861 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28862 Long delta0 = delta;
28863 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
28864 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
28865 if (delta != delta0)
28866 goto decode_success;
28867 }
28868 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
28869 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28870 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28871 Long delta0 = delta;
28872 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
28873 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
28874 if (delta != delta0)
28875 goto decode_success;
28876 }
28877 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
28878 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28879 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28880 Long delta0 = delta;
28881 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
28882 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
28883 if (delta != delta0)
28884 goto decode_success;
28885 }
28886 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
28887 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28888 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28889 Long delta0 = delta;
28890 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
28891 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
28892 if (delta != delta0)
28893 goto decode_success;
28894 }
28895 break;
28896
28897 case 0x92:
28898 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
28899 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28900 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28901 Long delta0 = delta;
28902 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
28903 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
28904 if (delta != delta0)
28905 goto decode_success;
28906 }
28907 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
28908 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28909 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28910 Long delta0 = delta;
28911 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
28912 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
28913 if (delta != delta0)
28914 goto decode_success;
28915 }
28916 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
28917 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28918 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28919 Long delta0 = delta;
28920 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
28921 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
28922 if (delta != delta0)
28923 goto decode_success;
28924 }
28925 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
28926 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28927 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28928 Long delta0 = delta;
28929 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
28930 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
28931 if (delta != delta0)
28932 goto decode_success;
28933 }
28934 break;
28935
28936 case 0x93:
28937 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
28938 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28939 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28940 Long delta0 = delta;
28941 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
28942 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
28943 if (delta != delta0)
28944 goto decode_success;
28945 }
28946 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
28947 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28948 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28949 Long delta0 = delta;
28950 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
28951 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
28952 if (delta != delta0)
28953 goto decode_success;
28954 }
28955 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
28956 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28957 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28958 Long delta0 = delta;
28959 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
28960 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
28961 if (delta != delta0)
28962 goto decode_success;
28963 }
28964 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
28965 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28966 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28967 Long delta0 = delta;
28968 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
28969 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
28970 if (delta != delta0)
28971 goto decode_success;
28972 }
28973 break;
28974
28975 case 0x96 ... 0x9F:
28976 case 0xA6 ... 0xAF:
28977 case 0xB6 ... 0xBF:
28978 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
28979 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
28980 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
28981 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
28982 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
28983 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
28984 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
28985 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
28986 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
28987 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
28988 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
28989 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
28990 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
28991 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
28992 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
28993 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
28994 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
28995 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
28996 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
28997 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
28998 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
28999 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29000 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29001 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29002 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29003 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29004 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29005 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29006 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29007 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29008 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29009 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29010 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29011 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29012 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29013 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29014 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29015 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29016 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29017 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29018 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29019 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29020 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29021 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29022 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29023 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29024 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29025 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29026 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29027 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29028 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29029 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29030 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29031 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29032 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29033 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29034 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29035 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29036 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29037 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29038 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29039 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29040 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29041 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29042 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29043 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29044 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29045 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29046 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29047 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29048 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29049 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29050 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29051 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29052 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29053 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29054 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29055 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29056 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29057 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29058 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29059 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29060 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29061 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29062 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29063 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29064 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29065 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29066 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29067 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29068 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29069 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29070 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29071 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29072 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29073 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29074 if (have66noF2noF3(pfx)) {
29075 delta = dis_FMA( vbi, pfx, delta, opc );
29076 *uses_vvvv = True;
29077 goto decode_success;
29078 }
29079 break;
29080
sewardj1407a362012-06-24 15:11:38 +000029081 case 0xDB:
29082 case 0xDC:
29083 case 0xDD:
29084 case 0xDE:
29085 case 0xDF:
29086 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29087 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29088 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29089 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29090 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29091 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29092 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29093 if (opc != 0xDB) *uses_vvvv = True;
29094 goto decode_success;
29095 }
29096 break;
29097
sewardjcc3d2192013-03-27 11:37:33 +000029098 case 0xF2:
29099 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29100 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29101 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29102 Int size = getRexW(pfx) ? 8 : 4;
29103 IRType ty = szToITy(size);
29104 IRTemp dst = newTemp(ty);
29105 IRTemp src1 = newTemp(ty);
29106 IRTemp src2 = newTemp(ty);
29107 UChar rm = getUChar(delta);
29108
29109 assign( src1, getIRegV(size,pfx) );
29110 if (epartIsReg(rm)) {
29111 assign( src2, getIRegE(size,pfx,rm) );
29112 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29113 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29114 delta++;
29115 } else {
29116 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29117 assign( src2, loadLE(ty, mkexpr(addr)) );
29118 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29119 nameIRegG(size,pfx,rm));
29120 delta += alen;
29121 }
29122
29123 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29124 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29125 mkexpr(src2) ) );
29126 putIRegG( size, pfx, rm, mkexpr(dst) );
29127 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29128 ? AMD64G_CC_OP_ANDN64
29129 : AMD64G_CC_OP_ANDN32)) );
29130 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29131 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29132 *uses_vvvv = True;
29133 goto decode_success;
29134 }
29135 break;
29136
29137 case 0xF3:
29138 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29139 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29140 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29141 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
29142 Int size = getRexW(pfx) ? 8 : 4;
29143 IRType ty = szToITy(size);
29144 IRTemp src = newTemp(ty);
29145 IRTemp dst = newTemp(ty);
29146 UChar rm = getUChar(delta);
29147
29148 if (epartIsReg(rm)) {
29149 assign( src, getIRegE(size,pfx,rm) );
29150 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
29151 nameIRegV(size,pfx));
29152 delta++;
29153 } else {
29154 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29155 assign( src, loadLE(ty, mkexpr(addr)) );
29156 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
29157 delta += alen;
29158 }
29159
29160 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29161 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
29162 mkexpr(src)), mkexpr(src)) );
29163 putIRegV( size, pfx, mkexpr(dst) );
29164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29165 ? AMD64G_CC_OP_BLSI64
29166 : AMD64G_CC_OP_BLSI32)) );
29167 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29168 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29169 *uses_vvvv = True;
29170 goto decode_success;
29171 }
29172 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29173 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29174 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29175 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
29176 Int size = getRexW(pfx) ? 8 : 4;
29177 IRType ty = szToITy(size);
29178 IRTemp src = newTemp(ty);
29179 IRTemp dst = newTemp(ty);
29180 UChar rm = getUChar(delta);
29181
29182 if (epartIsReg(rm)) {
29183 assign( src, getIRegE(size,pfx,rm) );
29184 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
29185 nameIRegV(size,pfx));
29186 delta++;
29187 } else {
29188 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29189 assign( src, loadLE(ty, mkexpr(addr)) );
29190 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
29191 delta += alen;
29192 }
29193
29194 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
29195 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29196 mkU(ty, 1)), mkexpr(src)) );
29197 putIRegV( size, pfx, mkexpr(dst) );
29198 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29199 ? AMD64G_CC_OP_BLSMSK64
29200 : AMD64G_CC_OP_BLSMSK32)) );
29201 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29202 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29203 *uses_vvvv = True;
29204 goto decode_success;
29205 }
29206 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29207 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29208 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29209 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
29210 Int size = getRexW(pfx) ? 8 : 4;
29211 IRType ty = szToITy(size);
29212 IRTemp src = newTemp(ty);
29213 IRTemp dst = newTemp(ty);
29214 UChar rm = getUChar(delta);
29215
29216 if (epartIsReg(rm)) {
29217 assign( src, getIRegE(size,pfx,rm) );
29218 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
29219 nameIRegV(size,pfx));
29220 delta++;
29221 } else {
29222 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29223 assign( src, loadLE(ty, mkexpr(addr)) );
29224 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
29225 delta += alen;
29226 }
29227
29228 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29229 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29230 mkU(ty, 1)), mkexpr(src)) );
29231 putIRegV( size, pfx, mkexpr(dst) );
29232 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29233 ? AMD64G_CC_OP_BLSR64
29234 : AMD64G_CC_OP_BLSR32)) );
29235 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29236 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29237 *uses_vvvv = True;
29238 goto decode_success;
29239 }
29240 break;
29241
29242 case 0xF5:
29243 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29244 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29245 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29246 Int size = getRexW(pfx) ? 8 : 4;
29247 IRType ty = szToITy(size);
29248 IRTemp dst = newTemp(ty);
29249 IRTemp src1 = newTemp(ty);
29250 IRTemp src2 = newTemp(ty);
29251 IRTemp start = newTemp(Ity_I8);
sewardj656b8f42013-03-27 22:15:36 +000029252 IRTemp cond = newTemp(Ity_I1);
sewardjcc3d2192013-03-27 11:37:33 +000029253 UChar rm = getUChar(delta);
29254
29255 assign( src2, getIRegV(size,pfx) );
29256 if (epartIsReg(rm)) {
29257 assign( src1, getIRegE(size,pfx,rm) );
29258 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
29259 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29260 delta++;
29261 } else {
29262 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29263 assign( src1, loadLE(ty, mkexpr(addr)) );
29264 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29265 nameIRegG(size,pfx,rm));
29266 delta += alen;
29267 }
29268
29269 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
29270 assign( cond, binop(Iop_CmpLT32U,
29271 unop(Iop_8Uto32, mkexpr(start)),
29272 mkU32(8*size)) );
29273 /* if (start < opsize) {
29274 if (start == 0)
29275 dst = 0;
29276 else
29277 dst = (src1 << (opsize-start)) u>> (opsize-start);
29278 } else {
29279 dst = src1;
29280 } */
29281 assign( dst,
29282 IRExpr_ITE(
29283 mkexpr(cond),
29284 IRExpr_ITE(
29285 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
29286 mkU(ty, 0),
29287 binop(
29288 mkSizedOp(ty,Iop_Shr8),
29289 binop(
29290 mkSizedOp(ty,Iop_Shl8),
29291 mkexpr(src1),
29292 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29293 ),
29294 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29295 )
29296 ),
29297 mkexpr(src1)
29298 )
29299 );
29300 putIRegG( size, pfx, rm, mkexpr(dst) );
29301 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29302 ? AMD64G_CC_OP_BLSR64
29303 : AMD64G_CC_OP_BLSR32)) );
29304 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29305 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
29306 *uses_vvvv = True;
29307 goto decode_success;
29308 }
29309 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
29310 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
29311 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29312 Int size = getRexW(pfx) ? 8 : 4;
29313 IRType ty = szToITy(size);
29314 IRTemp src = newTemp(ty);
29315 IRTemp mask = newTemp(ty);
29316 UChar rm = getUChar(delta);
29317
29318 assign( src, getIRegV(size,pfx) );
29319 if (epartIsReg(rm)) {
29320 assign( mask, getIRegE(size,pfx,rm) );
29321 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
29322 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29323 delta++;
29324 } else {
29325 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29326 assign( mask, loadLE(ty, mkexpr(addr)) );
29327 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29328 nameIRegG(size,pfx,rm));
29329 delta += alen;
29330 }
29331
29332 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
29333 widenUto64(mkexpr(mask)) );
29334 putIRegG( size, pfx, rm,
29335 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
29336 "amd64g_calculate_pdep",
29337 &amd64g_calculate_pdep, args)) );
29338 *uses_vvvv = True;
29339 /* Flags aren't modified. */
29340 goto decode_success;
29341 }
29342 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
29343 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
29344 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29345 Int size = getRexW(pfx) ? 8 : 4;
29346 IRType ty = szToITy(size);
29347 IRTemp src = newTemp(ty);
29348 IRTemp mask = newTemp(ty);
29349 UChar rm = getUChar(delta);
29350
29351 assign( src, getIRegV(size,pfx) );
29352 if (epartIsReg(rm)) {
29353 assign( mask, getIRegE(size,pfx,rm) );
29354 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
29355 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29356 delta++;
29357 } else {
29358 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29359 assign( mask, loadLE(ty, mkexpr(addr)) );
29360 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29361 nameIRegG(size,pfx,rm));
29362 delta += alen;
29363 }
29364
29365 /* First mask off bits not set in mask, they are ignored
29366 and it should be fine if they contain undefined values. */
29367 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
29368 mkexpr(src), mkexpr(mask));
29369 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
29370 widenUto64(mkexpr(mask)) );
29371 putIRegG( size, pfx, rm,
29372 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
29373 "amd64g_calculate_pext",
29374 &amd64g_calculate_pext, args)) );
29375 *uses_vvvv = True;
29376 /* Flags aren't modified. */
29377 goto decode_success;
29378 }
29379 break;
29380
29381 case 0xF6:
29382 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
29383 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
29384 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29385 Int size = getRexW(pfx) ? 8 : 4;
29386 IRType ty = szToITy(size);
29387 IRTemp src1 = newTemp(ty);
29388 IRTemp src2 = newTemp(ty);
29389 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
29390 UChar rm = getUChar(delta);
29391
29392 assign( src1, getIRegRDX(size) );
29393 if (epartIsReg(rm)) {
29394 assign( src2, getIRegE(size,pfx,rm) );
29395 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
29396 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29397 delta++;
29398 } else {
29399 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29400 assign( src2, loadLE(ty, mkexpr(addr)) );
29401 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29402 nameIRegG(size,pfx,rm));
29403 delta += alen;
29404 }
29405
29406 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
29407 mkexpr(src1), mkexpr(src2)) );
29408 putIRegV( size, pfx,
29409 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
29410 putIRegG( size, pfx, rm,
29411 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
29412 mkexpr(res)) );
29413 *uses_vvvv = True;
29414 /* Flags aren't modified. */
29415 goto decode_success;
29416 }
29417 break;
29418
29419 case 0xF7:
29420 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
29421 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
29422 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29423 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
29424 goto decode_success;
29425 }
29426 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
29427 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
29428 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29429 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
29430 goto decode_success;
29431 }
29432 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
29433 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
29434 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29435 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
29436 goto decode_success;
29437 }
29438 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
29439 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
29440 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29441 Int size = getRexW(pfx) ? 8 : 4;
29442 IRType ty = szToITy(size);
29443 IRTemp dst = newTemp(ty);
29444 IRTemp src1 = newTemp(ty);
29445 IRTemp src2 = newTemp(ty);
29446 IRTemp stle = newTemp(Ity_I16);
29447 IRTemp start = newTemp(Ity_I8);
29448 IRTemp len = newTemp(Ity_I8);
29449 UChar rm = getUChar(delta);
29450
29451 assign( src2, getIRegV(size,pfx) );
29452 if (epartIsReg(rm)) {
29453 assign( src1, getIRegE(size,pfx,rm) );
29454 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
29455 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29456 delta++;
29457 } else {
29458 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29459 assign( src1, loadLE(ty, mkexpr(addr)) );
29460 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29461 nameIRegG(size,pfx,rm));
29462 delta += alen;
29463 }
29464
29465 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
29466 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
29467 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
29468 /* if (start+len < opsize) {
29469 if (len != 0)
29470 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
29471 else
29472 dst = 0;
29473 } else {
29474 if (start < opsize)
29475 dst = src1 u>> start;
29476 else
29477 dst = 0;
29478 } */
29479 assign( dst,
29480 IRExpr_ITE(
29481 binop(Iop_CmpLT32U,
29482 binop(Iop_Add32,
29483 unop(Iop_8Uto32, mkexpr(start)),
29484 unop(Iop_8Uto32, mkexpr(len))),
29485 mkU32(8*size)),
29486 IRExpr_ITE(
29487 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
29488 mkU(ty, 0),
29489 binop(mkSizedOp(ty,Iop_Shr8),
29490 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
29491 binop(Iop_Sub8,
29492 binop(Iop_Sub8, mkU8(8*size),
29493 mkexpr(start)),
29494 mkexpr(len))),
29495 binop(Iop_Sub8, mkU8(8*size),
29496 mkexpr(len)))
29497 ),
29498 IRExpr_ITE(
29499 binop(Iop_CmpLT32U,
29500 unop(Iop_8Uto32, mkexpr(start)),
29501 mkU32(8*size)),
29502 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
29503 mkexpr(start)),
29504 mkU(ty, 0)
29505 )
29506 )
29507 );
29508 putIRegG( size, pfx, rm, mkexpr(dst) );
29509 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29510 ? AMD64G_CC_OP_ANDN64
29511 : AMD64G_CC_OP_ANDN32)) );
29512 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29513 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29514 *uses_vvvv = True;
29515 goto decode_success;
29516 }
29517 break;
29518
sewardjc4530ae2012-05-21 10:18:49 +000029519 default:
29520 break;
29521
29522 }
29523
29524 //decode_failure:
29525 return deltaIN;
29526
29527 decode_success:
29528 return delta;
29529}
29530
29531
29532/*------------------------------------------------------------*/
29533/*--- ---*/
29534/*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
29535/*--- ---*/
29536/*------------------------------------------------------------*/
29537
sewardjfe0c5e72012-06-15 15:48:07 +000029538static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
29539{
29540 vassert(imm8 < 256);
29541 IRTemp s3, s2, s1, s0;
29542 s3 = s2 = s1 = s0 = IRTemp_INVALID;
29543 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
29544# define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
29545 : ((_nn)==2) ? s2 : s3)
29546 IRTemp res = newTemp(Ity_V128);
29547 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
29548 SEL((imm8 >> 4) & 3),
29549 SEL((imm8 >> 2) & 3),
29550 SEL((imm8 >> 0) & 3) ));
29551# undef SEL
29552 return res;
29553}
29554
sewardjc4530ae2012-05-21 10:18:49 +000029555__attribute__((noinline))
29556static
29557Long dis_ESC_0F3A__VEX (
29558 /*MB_OUT*/DisResult* dres,
29559 /*OUT*/ Bool* uses_vvvv,
florianbeac5302014-12-31 12:09:38 +000029560 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardjc4530ae2012-05-21 10:18:49 +000029561 Bool resteerCisOk,
29562 void* callback_opaque,
floriancacba8e2014-12-15 18:58:07 +000029563 const VexArchInfo* archinfo,
29564 const VexAbiInfo* vbi,
sewardjc4530ae2012-05-21 10:18:49 +000029565 Prefix pfx, Int sz, Long deltaIN
29566 )
29567{
29568 IRTemp addr = IRTemp_INVALID;
29569 Int alen = 0;
29570 HChar dis_buf[50];
29571 Long delta = deltaIN;
29572 UChar opc = getUChar(delta);
29573 delta++;
29574 *uses_vvvv = False;
29575
29576 switch (opc) {
29577
sewardjcc3d2192013-03-27 11:37:33 +000029578 case 0x00:
29579 case 0x01:
29580 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
29581 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
29582 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29583 && 1==getRexW(pfx)/*W1*/) {
29584 UChar modrm = getUChar(delta);
29585 UInt imm8 = 0;
29586 UInt rG = gregOfRexRM(pfx, modrm);
29587 IRTemp sV = newTemp(Ity_V256);
29588 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
29589 if (epartIsReg(modrm)) {
29590 UInt rE = eregOfRexRM(pfx, modrm);
29591 delta += 1;
29592 imm8 = getUChar(delta);
29593 DIP("%s $%u,%s,%s\n",
29594 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
29595 assign(sV, getYMMReg(rE));
29596 } else {
29597 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29598 delta += alen;
29599 imm8 = getUChar(delta);
29600 DIP("%s $%u,%s,%s\n",
29601 name, imm8, dis_buf, nameYMMReg(rG));
29602 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29603 }
29604 delta++;
29605 IRTemp s[4];
29606 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
29607 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
29608 IRTemp dV = newTemp(Ity_V256);
29609 assign(dV, IRExpr_Qop(Iop_64x4toV256,
29610 mkexpr(s[(imm8 >> 6) & 3]),
29611 mkexpr(s[(imm8 >> 4) & 3]),
29612 mkexpr(s[(imm8 >> 2) & 3]),
29613 mkexpr(s[(imm8 >> 0) & 3])));
29614 putYMMReg(rG, mkexpr(dV));
29615 goto decode_success;
29616 }
29617 break;
29618
29619 case 0x02:
29620 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
29621 if (have66noF2noF3(pfx)
29622 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
29623 UChar modrm = getUChar(delta);
29624 UInt imm8 = 0;
29625 UInt rG = gregOfRexRM(pfx, modrm);
29626 UInt rV = getVexNvvvv(pfx);
29627 IRTemp sV = newTemp(Ity_V128);
29628 IRTemp dV = newTemp(Ity_V128);
29629 UInt i;
29630 IRTemp s[4], d[4];
29631 assign(sV, getXMMReg(rV));
29632 if (epartIsReg(modrm)) {
29633 UInt rE = eregOfRexRM(pfx, modrm);
29634 delta += 1;
29635 imm8 = getUChar(delta);
29636 DIP("vpblendd $%u,%s,%s,%s\n",
29637 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
29638 assign(dV, getXMMReg(rE));
29639 } else {
29640 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29641 delta += alen;
29642 imm8 = getUChar(delta);
29643 DIP("vpblendd $%u,%s,%s,%s\n",
29644 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
29645 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
29646 }
29647 delta++;
29648 for (i = 0; i < 4; i++) {
29649 s[i] = IRTemp_INVALID;
29650 d[i] = IRTemp_INVALID;
29651 }
29652 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
29653 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
29654 for (i = 0; i < 4; i++)
29655 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
29656 putYMMRegLane128(rG, 1, mkV128(0));
29657 *uses_vvvv = True;
29658 goto decode_success;
29659 }
29660 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
29661 if (have66noF2noF3(pfx)
29662 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29663 UChar modrm = getUChar(delta);
29664 UInt imm8 = 0;
29665 UInt rG = gregOfRexRM(pfx, modrm);
29666 UInt rV = getVexNvvvv(pfx);
29667 IRTemp sV = newTemp(Ity_V256);
29668 IRTemp dV = newTemp(Ity_V256);
29669 UInt i;
29670 IRTemp s[8], d[8];
29671 assign(sV, getYMMReg(rV));
29672 if (epartIsReg(modrm)) {
29673 UInt rE = eregOfRexRM(pfx, modrm);
29674 delta += 1;
29675 imm8 = getUChar(delta);
29676 DIP("vpblendd $%u,%s,%s,%s\n",
29677 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
29678 assign(dV, getYMMReg(rE));
29679 } else {
29680 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29681 delta += alen;
29682 imm8 = getUChar(delta);
29683 DIP("vpblendd $%u,%s,%s,%s\n",
29684 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
29685 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
29686 }
29687 delta++;
29688 for (i = 0; i < 8; i++) {
29689 s[i] = IRTemp_INVALID;
29690 d[i] = IRTemp_INVALID;
29691 }
29692 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
29693 &s[3], &s[2], &s[1], &s[0] );
29694 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
29695 &d[3], &d[2], &d[1], &d[0] );
29696 for (i = 0; i < 8; i++)
29697 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
29698 *uses_vvvv = True;
29699 goto decode_success;
29700 }
29701 break;
29702
sewardjfe0c5e72012-06-15 15:48:07 +000029703 case 0x04:
sewardj21459cb2012-06-18 14:05:52 +000029704 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
29705 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000029706 UChar modrm = getUChar(delta);
29707 UInt imm8 = 0;
29708 UInt rG = gregOfRexRM(pfx, modrm);
29709 IRTemp sV = newTemp(Ity_V256);
29710 if (epartIsReg(modrm)) {
29711 UInt rE = eregOfRexRM(pfx, modrm);
29712 delta += 1;
29713 imm8 = getUChar(delta);
29714 DIP("vpermilps $%u,%s,%s\n",
29715 imm8, nameYMMReg(rE), nameYMMReg(rG));
29716 assign(sV, getYMMReg(rE));
29717 } else {
29718 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29719 delta += alen;
29720 imm8 = getUChar(delta);
29721 DIP("vpermilps $%u,%s,%s\n",
29722 imm8, dis_buf, nameYMMReg(rG));
29723 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29724 }
29725 delta++;
29726 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
29727 breakupV256toV128s( sV, &sVhi, &sVlo );
29728 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
29729 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
29730 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
29731 putYMMReg(rG, res);
29732 goto decode_success;
29733 }
sewardj21459cb2012-06-18 14:05:52 +000029734 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
29735 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29736 UChar modrm = getUChar(delta);
29737 UInt imm8 = 0;
29738 UInt rG = gregOfRexRM(pfx, modrm);
29739 IRTemp sV = newTemp(Ity_V128);
29740 if (epartIsReg(modrm)) {
29741 UInt rE = eregOfRexRM(pfx, modrm);
29742 delta += 1;
29743 imm8 = getUChar(delta);
29744 DIP("vpermilps $%u,%s,%s\n",
29745 imm8, nameXMMReg(rE), nameXMMReg(rG));
29746 assign(sV, getXMMReg(rE));
29747 } else {
29748 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29749 delta += alen;
29750 imm8 = getUChar(delta);
29751 DIP("vpermilps $%u,%s,%s\n",
29752 imm8, dis_buf, nameXMMReg(rG));
29753 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
29754 }
29755 delta++;
29756 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
29757 goto decode_success;
29758 }
sewardjfe0c5e72012-06-15 15:48:07 +000029759 break;
29760
sewardje8a7eb72012-06-12 14:59:17 +000029761 case 0x05:
sewardj21459cb2012-06-18 14:05:52 +000029762 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
29763 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000029764 UChar modrm = getUChar(delta);
29765 UInt imm8 = 0;
29766 UInt rG = gregOfRexRM(pfx, modrm);
29767 IRTemp sV = newTemp(Ity_V128);
29768 if (epartIsReg(modrm)) {
29769 UInt rE = eregOfRexRM(pfx, modrm);
29770 delta += 1;
29771 imm8 = getUChar(delta);
29772 DIP("vpermilpd $%u,%s,%s\n",
29773 imm8, nameXMMReg(rE), nameXMMReg(rG));
29774 assign(sV, getXMMReg(rE));
29775 } else {
29776 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29777 delta += alen;
29778 imm8 = getUChar(delta);
29779 DIP("vpermilpd $%u,%s,%s\n",
29780 imm8, dis_buf, nameXMMReg(rG));
29781 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
29782 }
29783 delta++;
29784 IRTemp s1 = newTemp(Ity_I64);
29785 IRTemp s0 = newTemp(Ity_I64);
29786 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
29787 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
29788 IRTemp dV = newTemp(Ity_V128);
29789 assign(dV, binop(Iop_64HLtoV128,
29790 mkexpr((imm8 & (1<<1)) ? s1 : s0),
29791 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
29792 putYMMRegLoAndZU(rG, mkexpr(dV));
29793 goto decode_success;
29794 }
sewardj21459cb2012-06-18 14:05:52 +000029795 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
29796 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardje8a7eb72012-06-12 14:59:17 +000029797 UChar modrm = getUChar(delta);
29798 UInt imm8 = 0;
29799 UInt rG = gregOfRexRM(pfx, modrm);
29800 IRTemp sV = newTemp(Ity_V256);
29801 if (epartIsReg(modrm)) {
29802 UInt rE = eregOfRexRM(pfx, modrm);
29803 delta += 1;
29804 imm8 = getUChar(delta);
29805 DIP("vpermilpd $%u,%s,%s\n",
29806 imm8, nameYMMReg(rE), nameYMMReg(rG));
29807 assign(sV, getYMMReg(rE));
29808 } else {
29809 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29810 delta += alen;
29811 imm8 = getUChar(delta);
29812 DIP("vpermilpd $%u,%s,%s\n",
29813 imm8, dis_buf, nameYMMReg(rG));
29814 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29815 }
29816 delta++;
sewardj4b1cc832012-06-13 11:10:20 +000029817 IRTemp s3, s2, s1, s0;
29818 s3 = s2 = s1 = s0 = IRTemp_INVALID;
29819 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
sewardje8a7eb72012-06-12 14:59:17 +000029820 IRTemp dV = newTemp(Ity_V256);
29821 assign(dV, IRExpr_Qop(Iop_64x4toV256,
29822 mkexpr((imm8 & (1<<3)) ? s3 : s2),
29823 mkexpr((imm8 & (1<<2)) ? s3 : s2),
29824 mkexpr((imm8 & (1<<1)) ? s1 : s0),
29825 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
29826 putYMMReg(rG, mkexpr(dV));
29827 goto decode_success;
29828 }
sewardje8a7eb72012-06-12 14:59:17 +000029829 break;
29830
29831 case 0x06:
29832 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
29833 if (have66noF2noF3(pfx)
29834 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29835 UChar modrm = getUChar(delta);
29836 UInt imm8 = 0;
29837 UInt rG = gregOfRexRM(pfx, modrm);
29838 UInt rV = getVexNvvvv(pfx);
29839 IRTemp s00 = newTemp(Ity_V128);
29840 IRTemp s01 = newTemp(Ity_V128);
29841 IRTemp s10 = newTemp(Ity_V128);
29842 IRTemp s11 = newTemp(Ity_V128);
29843 assign(s00, getYMMRegLane128(rV, 0));
29844 assign(s01, getYMMRegLane128(rV, 1));
29845 if (epartIsReg(modrm)) {
29846 UInt rE = eregOfRexRM(pfx, modrm);
29847 delta += 1;
29848 imm8 = getUChar(delta);
29849 DIP("vperm2f128 $%u,%s,%s,%s\n",
29850 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
29851 assign(s10, getYMMRegLane128(rE, 0));
29852 assign(s11, getYMMRegLane128(rE, 1));
29853 } else {
29854 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29855 delta += alen;
29856 imm8 = getUChar(delta);
29857 DIP("vperm2f128 $%u,%s,%s,%s\n",
29858 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
29859 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
29860 mkexpr(addr), mkU64(0))));
29861 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
29862 mkexpr(addr), mkU64(16))));
29863 }
29864 delta++;
29865# define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
29866 : ((_nn)==2) ? s10 : s11)
29867 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
29868 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
29869# undef SEL
29870 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
29871 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
29872 *uses_vvvv = True;
29873 goto decode_success;
29874 }
29875 break;
29876
sewardj4f228902012-06-21 09:17:58 +000029877 case 0x08:
sewardja9651762012-06-24 11:09:37 +000029878 /* VROUNDPS imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000029879 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
29880 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29881 UChar modrm = getUChar(delta);
29882 UInt rG = gregOfRexRM(pfx, modrm);
29883 IRTemp src = newTemp(Ity_V128);
29884 IRTemp s0 = IRTemp_INVALID;
29885 IRTemp s1 = IRTemp_INVALID;
29886 IRTemp s2 = IRTemp_INVALID;
29887 IRTemp s3 = IRTemp_INVALID;
29888 IRTemp rm = newTemp(Ity_I32);
29889 Int imm = 0;
29890
29891 modrm = getUChar(delta);
29892
29893 if (epartIsReg(modrm)) {
29894 UInt rE = eregOfRexRM(pfx, modrm);
29895 assign( src, getXMMReg( rE ) );
29896 imm = getUChar(delta+1);
29897 if (imm & ~15) break;
29898 delta += 1+1;
29899 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
29900 } else {
29901 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29902 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
29903 imm = getUChar(delta+alen);
29904 if (imm & ~15) break;
29905 delta += alen+1;
29906 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
29907 }
29908
29909 /* (imm & 3) contains an Intel-encoded rounding mode. Because
29910 that encoding is the same as the encoding for IRRoundingMode,
29911 we can use that value directly in the IR as a rounding
29912 mode. */
29913 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
29914
29915 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
29916 putYMMRegLane128( rG, 1, mkV128(0) );
29917# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
29918 unop(Iop_ReinterpI32asF32, mkexpr(s)))
29919 putYMMRegLane32F( rG, 3, CVT(s3) );
29920 putYMMRegLane32F( rG, 2, CVT(s2) );
29921 putYMMRegLane32F( rG, 1, CVT(s1) );
29922 putYMMRegLane32F( rG, 0, CVT(s0) );
29923# undef CVT
29924 goto decode_success;
29925 }
sewardja9651762012-06-24 11:09:37 +000029926 /* VROUNDPS imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000029927 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
29928 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29929 UChar modrm = getUChar(delta);
29930 UInt rG = gregOfRexRM(pfx, modrm);
29931 IRTemp src = newTemp(Ity_V256);
29932 IRTemp s0 = IRTemp_INVALID;
29933 IRTemp s1 = IRTemp_INVALID;
29934 IRTemp s2 = IRTemp_INVALID;
29935 IRTemp s3 = IRTemp_INVALID;
29936 IRTemp s4 = IRTemp_INVALID;
29937 IRTemp s5 = IRTemp_INVALID;
29938 IRTemp s6 = IRTemp_INVALID;
29939 IRTemp s7 = IRTemp_INVALID;
29940 IRTemp rm = newTemp(Ity_I32);
29941 Int imm = 0;
29942
29943 modrm = getUChar(delta);
29944
29945 if (epartIsReg(modrm)) {
29946 UInt rE = eregOfRexRM(pfx, modrm);
29947 assign( src, getYMMReg( rE ) );
29948 imm = getUChar(delta+1);
29949 if (imm & ~15) break;
29950 delta += 1+1;
29951 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
29952 } else {
29953 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29954 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
29955 imm = getUChar(delta+alen);
29956 if (imm & ~15) break;
29957 delta += alen+1;
29958 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
29959 }
29960
29961 /* (imm & 3) contains an Intel-encoded rounding mode. Because
29962 that encoding is the same as the encoding for IRRoundingMode,
29963 we can use that value directly in the IR as a rounding
29964 mode. */
29965 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
29966
29967 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
29968# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
29969 unop(Iop_ReinterpI32asF32, mkexpr(s)))
29970 putYMMRegLane32F( rG, 7, CVT(s7) );
29971 putYMMRegLane32F( rG, 6, CVT(s6) );
29972 putYMMRegLane32F( rG, 5, CVT(s5) );
29973 putYMMRegLane32F( rG, 4, CVT(s4) );
29974 putYMMRegLane32F( rG, 3, CVT(s3) );
29975 putYMMRegLane32F( rG, 2, CVT(s2) );
29976 putYMMRegLane32F( rG, 1, CVT(s1) );
29977 putYMMRegLane32F( rG, 0, CVT(s0) );
29978# undef CVT
29979 goto decode_success;
29980 }
29981
29982 case 0x09:
sewardja9651762012-06-24 11:09:37 +000029983 /* VROUNDPD imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000029984 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
29985 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29986 UChar modrm = getUChar(delta);
29987 UInt rG = gregOfRexRM(pfx, modrm);
29988 IRTemp src = newTemp(Ity_V128);
29989 IRTemp s0 = IRTemp_INVALID;
29990 IRTemp s1 = IRTemp_INVALID;
29991 IRTemp rm = newTemp(Ity_I32);
29992 Int imm = 0;
29993
29994 modrm = getUChar(delta);
29995
29996 if (epartIsReg(modrm)) {
29997 UInt rE = eregOfRexRM(pfx, modrm);
29998 assign( src, getXMMReg( rE ) );
29999 imm = getUChar(delta+1);
30000 if (imm & ~15) break;
30001 delta += 1+1;
30002 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30003 } else {
30004 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30005 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30006 imm = getUChar(delta+alen);
30007 if (imm & ~15) break;
30008 delta += alen+1;
30009 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30010 }
30011
30012 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30013 that encoding is the same as the encoding for IRRoundingMode,
30014 we can use that value directly in the IR as a rounding
30015 mode. */
30016 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30017
30018 breakupV128to64s( src, &s1, &s0 );
30019 putYMMRegLane128( rG, 1, mkV128(0) );
30020# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30021 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30022 putYMMRegLane64F( rG, 1, CVT(s1) );
30023 putYMMRegLane64F( rG, 0, CVT(s0) );
30024# undef CVT
30025 goto decode_success;
30026 }
sewardja9651762012-06-24 11:09:37 +000030027 /* VROUNDPD imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000030028 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30029 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30030 UChar modrm = getUChar(delta);
30031 UInt rG = gregOfRexRM(pfx, modrm);
30032 IRTemp src = newTemp(Ity_V256);
30033 IRTemp s0 = IRTemp_INVALID;
30034 IRTemp s1 = IRTemp_INVALID;
30035 IRTemp s2 = IRTemp_INVALID;
30036 IRTemp s3 = IRTemp_INVALID;
30037 IRTemp rm = newTemp(Ity_I32);
30038 Int imm = 0;
30039
30040 modrm = getUChar(delta);
30041
30042 if (epartIsReg(modrm)) {
30043 UInt rE = eregOfRexRM(pfx, modrm);
30044 assign( src, getYMMReg( rE ) );
30045 imm = getUChar(delta+1);
30046 if (imm & ~15) break;
30047 delta += 1+1;
30048 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30049 } else {
30050 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30051 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30052 imm = getUChar(delta+alen);
30053 if (imm & ~15) break;
30054 delta += alen+1;
30055 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30056 }
30057
30058 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30059 that encoding is the same as the encoding for IRRoundingMode,
30060 we can use that value directly in the IR as a rounding
30061 mode. */
30062 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30063
30064 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
30065# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30066 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30067 putYMMRegLane64F( rG, 3, CVT(s3) );
30068 putYMMRegLane64F( rG, 2, CVT(s2) );
30069 putYMMRegLane64F( rG, 1, CVT(s1) );
30070 putYMMRegLane64F( rG, 0, CVT(s0) );
30071# undef CVT
30072 goto decode_success;
30073 }
30074
30075 case 0x0A:
30076 case 0x0B:
30077 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30078 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30079 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30080 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30081 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30082 UChar modrm = getUChar(delta);
30083 UInt rG = gregOfRexRM(pfx, modrm);
30084 UInt rV = getVexNvvvv(pfx);
30085 Bool isD = opc == 0x0B;
30086 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
30087 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
30088 Int imm = 0;
30089
30090 if (epartIsReg(modrm)) {
30091 UInt rE = eregOfRexRM(pfx, modrm);
30092 assign( src,
30093 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
30094 imm = getUChar(delta+1);
30095 if (imm & ~15) break;
30096 delta += 1+1;
30097 DIP( "vrounds%c $%d,%s,%s,%s\n",
30098 isD ? 'd' : 's',
30099 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
30100 } else {
30101 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30102 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
30103 imm = getUChar(delta+alen);
30104 if (imm & ~15) break;
30105 delta += alen+1;
30106 DIP( "vrounds%c $%d,%s,%s,%s\n",
30107 isD ? 'd' : 's',
30108 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
30109 }
30110
30111 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30112 that encoding is the same as the encoding for IRRoundingMode,
30113 we can use that value directly in the IR as a rounding
30114 mode. */
30115 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
30116 (imm & 4) ? get_sse_roundingmode()
30117 : mkU32(imm & 3),
30118 mkexpr(src)) );
30119
30120 if (isD)
30121 putXMMRegLane64F( rG, 0, mkexpr(res) );
30122 else {
30123 putXMMRegLane32F( rG, 0, mkexpr(res) );
30124 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
30125 }
30126 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
30127 putYMMRegLane128( rG, 1, mkV128(0) );
30128 *uses_vvvv = True;
30129 goto decode_success;
30130 }
30131 break;
30132
sewardj21459cb2012-06-18 14:05:52 +000030133 case 0x0C:
30134 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
30135 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
30136 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30137 UChar modrm = getUChar(delta);
30138 UInt imm8;
30139 UInt rG = gregOfRexRM(pfx, modrm);
30140 UInt rV = getVexNvvvv(pfx);
30141 IRTemp sV = newTemp(Ity_V256);
30142 IRTemp sE = newTemp(Ity_V256);
30143 assign ( sV, getYMMReg(rV) );
30144 if (epartIsReg(modrm)) {
30145 UInt rE = eregOfRexRM(pfx, modrm);
30146 delta += 1;
30147 imm8 = getUChar(delta);
30148 DIP("vblendps $%u,%s,%s,%s\n",
30149 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30150 assign(sE, getYMMReg(rE));
30151 } else {
30152 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30153 delta += alen;
30154 imm8 = getUChar(delta);
30155 DIP("vblendps $%u,%s,%s,%s\n",
30156 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30157 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30158 }
30159 delta++;
30160 putYMMReg( rG,
30161 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
30162 *uses_vvvv = True;
30163 goto decode_success;
30164 }
30165 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
30166 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
30167 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30168 UChar modrm = getUChar(delta);
30169 UInt imm8;
30170 UInt rG = gregOfRexRM(pfx, modrm);
30171 UInt rV = getVexNvvvv(pfx);
30172 IRTemp sV = newTemp(Ity_V128);
30173 IRTemp sE = newTemp(Ity_V128);
30174 assign ( sV, getXMMReg(rV) );
30175 if (epartIsReg(modrm)) {
30176 UInt rE = eregOfRexRM(pfx, modrm);
30177 delta += 1;
30178 imm8 = getUChar(delta);
30179 DIP("vblendps $%u,%s,%s,%s\n",
30180 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30181 assign(sE, getXMMReg(rE));
30182 } else {
30183 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30184 delta += alen;
30185 imm8 = getUChar(delta);
30186 DIP("vblendps $%u,%s,%s,%s\n",
30187 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30188 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30189 }
30190 delta++;
30191 putYMMRegLoAndZU( rG,
30192 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
30193 *uses_vvvv = True;
30194 goto decode_success;
30195 }
30196 break;
30197
30198 case 0x0D:
30199 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
30200 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
30201 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30202 UChar modrm = getUChar(delta);
30203 UInt imm8;
30204 UInt rG = gregOfRexRM(pfx, modrm);
30205 UInt rV = getVexNvvvv(pfx);
30206 IRTemp sV = newTemp(Ity_V256);
30207 IRTemp sE = newTemp(Ity_V256);
30208 assign ( sV, getYMMReg(rV) );
30209 if (epartIsReg(modrm)) {
30210 UInt rE = eregOfRexRM(pfx, modrm);
30211 delta += 1;
30212 imm8 = getUChar(delta);
30213 DIP("vblendpd $%u,%s,%s,%s\n",
30214 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30215 assign(sE, getYMMReg(rE));
30216 } else {
30217 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30218 delta += alen;
30219 imm8 = getUChar(delta);
30220 DIP("vblendpd $%u,%s,%s,%s\n",
30221 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30222 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30223 }
30224 delta++;
30225 putYMMReg( rG,
30226 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
30227 *uses_vvvv = True;
30228 goto decode_success;
30229 }
30230 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
30231 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
30232 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30233 UChar modrm = getUChar(delta);
30234 UInt imm8;
30235 UInt rG = gregOfRexRM(pfx, modrm);
30236 UInt rV = getVexNvvvv(pfx);
30237 IRTemp sV = newTemp(Ity_V128);
30238 IRTemp sE = newTemp(Ity_V128);
30239 assign ( sV, getXMMReg(rV) );
30240 if (epartIsReg(modrm)) {
30241 UInt rE = eregOfRexRM(pfx, modrm);
30242 delta += 1;
30243 imm8 = getUChar(delta);
30244 DIP("vblendpd $%u,%s,%s,%s\n",
30245 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30246 assign(sE, getXMMReg(rE));
30247 } else {
30248 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30249 delta += alen;
30250 imm8 = getUChar(delta);
30251 DIP("vblendpd $%u,%s,%s,%s\n",
30252 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30253 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30254 }
30255 delta++;
30256 putYMMRegLoAndZU( rG,
30257 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
30258 *uses_vvvv = True;
30259 goto decode_success;
30260 }
30261 break;
30262
30263 case 0x0E:
30264 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
30265 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
30266 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30267 UChar modrm = getUChar(delta);
30268 UInt imm8;
30269 UInt rG = gregOfRexRM(pfx, modrm);
30270 UInt rV = getVexNvvvv(pfx);
30271 IRTemp sV = newTemp(Ity_V128);
30272 IRTemp sE = newTemp(Ity_V128);
30273 assign ( sV, getXMMReg(rV) );
30274 if (epartIsReg(modrm)) {
30275 UInt rE = eregOfRexRM(pfx, modrm);
30276 delta += 1;
30277 imm8 = getUChar(delta);
30278 DIP("vpblendw $%u,%s,%s,%s\n",
30279 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30280 assign(sE, getXMMReg(rE));
30281 } else {
30282 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30283 delta += alen;
30284 imm8 = getUChar(delta);
30285 DIP("vpblendw $%u,%s,%s,%s\n",
sewardjcc3d2192013-03-27 11:37:33 +000030286 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
sewardj21459cb2012-06-18 14:05:52 +000030287 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30288 }
30289 delta++;
30290 putYMMRegLoAndZU( rG,
30291 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
30292 *uses_vvvv = True;
30293 goto decode_success;
30294 }
sewardjcc3d2192013-03-27 11:37:33 +000030295 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
30296 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
30297 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30298 UChar modrm = getUChar(delta);
30299 UInt imm8;
30300 UInt rG = gregOfRexRM(pfx, modrm);
30301 UInt rV = getVexNvvvv(pfx);
30302 IRTemp sV = newTemp(Ity_V256);
30303 IRTemp sE = newTemp(Ity_V256);
30304 IRTemp sVhi, sVlo, sEhi, sElo;
30305 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
30306 assign ( sV, getYMMReg(rV) );
30307 if (epartIsReg(modrm)) {
30308 UInt rE = eregOfRexRM(pfx, modrm);
30309 delta += 1;
30310 imm8 = getUChar(delta);
30311 DIP("vpblendw $%u,%s,%s,%s\n",
30312 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30313 assign(sE, getYMMReg(rE));
30314 } else {
30315 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30316 delta += alen;
30317 imm8 = getUChar(delta);
30318 DIP("vpblendw $%u,%s,%s,%s\n",
30319 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30320 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30321 }
30322 delta++;
30323 breakupV256toV128s( sV, &sVhi, &sVlo );
30324 breakupV256toV128s( sE, &sEhi, &sElo );
30325 putYMMReg( rG, binop( Iop_V128HLtoV256,
30326 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
30327 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
30328 *uses_vvvv = True;
30329 goto decode_success;
30330 }
sewardj21459cb2012-06-18 14:05:52 +000030331 break;
30332
sewardj151cd3e2012-06-18 13:56:55 +000030333 case 0x0F:
30334 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
30335 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
30336 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30337 UChar modrm = getUChar(delta);
30338 UInt rG = gregOfRexRM(pfx, modrm);
30339 UInt rV = getVexNvvvv(pfx);
30340 IRTemp sV = newTemp(Ity_V128);
30341 IRTemp dV = newTemp(Ity_V128);
30342 UInt imm8;
30343
30344 assign( dV, getXMMReg(rV) );
30345
30346 if ( epartIsReg( modrm ) ) {
30347 UInt rE = eregOfRexRM(pfx, modrm);
30348 assign( sV, getXMMReg(rE) );
30349 imm8 = getUChar(delta+1);
30350 delta += 1+1;
30351 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE),
30352 nameXMMReg(rV), nameXMMReg(rG));
30353 } else {
30354 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj151cd3e2012-06-18 13:56:55 +000030355 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
30356 imm8 = getUChar(delta+alen);
30357 delta += alen+1;
30358 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf,
30359 nameXMMReg(rV), nameXMMReg(rG));
30360 }
30361
30362 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
30363 putYMMRegLoAndZU( rG, mkexpr(res) );
30364 *uses_vvvv = True;
30365 goto decode_success;
30366 }
sewardjcc3d2192013-03-27 11:37:33 +000030367 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
30368 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
30369 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30370 UChar modrm = getUChar(delta);
30371 UInt rG = gregOfRexRM(pfx, modrm);
30372 UInt rV = getVexNvvvv(pfx);
30373 IRTemp sV = newTemp(Ity_V256);
30374 IRTemp dV = newTemp(Ity_V256);
30375 IRTemp sHi, sLo, dHi, dLo;
30376 sHi = sLo = dHi = dLo = IRTemp_INVALID;
30377 UInt imm8;
30378
30379 assign( dV, getYMMReg(rV) );
30380
30381 if ( epartIsReg( modrm ) ) {
30382 UInt rE = eregOfRexRM(pfx, modrm);
30383 assign( sV, getYMMReg(rE) );
30384 imm8 = getUChar(delta+1);
30385 delta += 1+1;
30386 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameYMMReg(rE),
30387 nameYMMReg(rV), nameYMMReg(rG));
30388 } else {
30389 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30390 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
30391 imm8 = getUChar(delta+alen);
30392 delta += alen+1;
30393 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf,
30394 nameYMMReg(rV), nameYMMReg(rG));
30395 }
30396
30397 breakupV256toV128s( dV, &dHi, &dLo );
30398 breakupV256toV128s( sV, &sHi, &sLo );
30399 putYMMReg( rG, binop( Iop_V128HLtoV256,
30400 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
30401 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
30402 );
30403 *uses_vvvv = True;
30404 goto decode_success;
30405 }
sewardj151cd3e2012-06-18 13:56:55 +000030406 break;
30407
sewardje8a7eb72012-06-12 14:59:17 +000030408 case 0x14:
30409 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
30410 if (have66noF2noF3(pfx)
30411 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30412 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
30413 goto decode_success;
30414 }
30415 break;
30416
sewardj82096922012-06-24 14:57:59 +000030417 case 0x15:
30418 /* VPEXTRW imm8, reg/m16, xmm2 */
30419 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
30420 if (have66noF2noF3(pfx)
30421 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30422 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
30423 goto decode_success;
30424 }
30425 break;
30426
sewardjc4530ae2012-05-21 10:18:49 +000030427 case 0x16:
30428 /* VPEXTRD imm8, r32/m32, xmm2 */
30429 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
30430 if (have66noF2noF3(pfx)
30431 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30432 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
30433 goto decode_success;
30434 }
sewardj56c30312012-06-12 08:45:39 +000030435 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
30436 if (have66noF2noF3(pfx)
30437 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
30438 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
30439 goto decode_success;
30440 }
sewardjc4530ae2012-05-21 10:18:49 +000030441 break;
30442
sewardjadf357c2012-06-24 13:44:17 +000030443 case 0x17:
30444 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
30445 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30446 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
30447 goto decode_success;
30448 }
30449 break;
30450
sewardjc4530ae2012-05-21 10:18:49 +000030451 case 0x18:
30452 /* VINSERTF128 r/m, rV, rD
sewardj251b59e2012-05-25 13:51:07 +000030453 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000030454 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
30455 if (have66noF2noF3(pfx)
30456 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30457 UChar modrm = getUChar(delta);
30458 UInt ib = 0;
sewardje8a7eb72012-06-12 14:59:17 +000030459 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000030460 UInt rV = getVexNvvvv(pfx);
30461 IRTemp t128 = newTemp(Ity_V128);
30462 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000030463 UInt rE = eregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000030464 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000030465 assign(t128, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000030466 ib = getUChar(delta);
30467 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000030468 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000030469 } else {
30470 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30471 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
30472 delta += alen;
30473 ib = getUChar(delta);
30474 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000030475 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000030476 }
30477 delta++;
sewardje8a7eb72012-06-12 14:59:17 +000030478 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
30479 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
30480 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000030481 *uses_vvvv = True;
30482 goto decode_success;
30483 }
30484 break;
30485
30486 case 0x19:
sewardjcfca8cd2012-05-27 08:25:42 +000030487 /* VEXTRACTF128 $lane_no, rS, r/m
sewardjc4530ae2012-05-21 10:18:49 +000030488 ::: r/m:V128 = a lane of rS:V256 (RM format) */
30489 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
30490 if (have66noF2noF3(pfx)
30491 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30492 UChar modrm = getUChar(delta);
30493 UInt ib = 0;
30494 UInt rS = gregOfRexRM(pfx, modrm);
30495 IRTemp t128 = newTemp(Ity_V128);
30496 if (epartIsReg(modrm)) {
30497 UInt rD = eregOfRexRM(pfx, modrm);
30498 delta += 1;
30499 ib = getUChar(delta);
30500 assign(t128, getYMMRegLane128(rS, ib & 1));
sewardjc93904b2012-05-27 13:50:42 +000030501 putYMMRegLoAndZU(rD, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000030502 DIP("vextractf128 $%u,%s,%s\n",
30503 ib, nameXMMReg(rS), nameYMMReg(rD));
30504 } else {
30505 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30506 delta += alen;
30507 ib = getUChar(delta);
30508 assign(t128, getYMMRegLane128(rS, ib & 1));
30509 storeLE(mkexpr(addr), mkexpr(t128));
30510 DIP("vextractf128 $%u,%s,%s\n",
30511 ib, nameYMMReg(rS), dis_buf);
30512 }
30513 delta++;
30514 /* doesn't use vvvv */
30515 goto decode_success;
30516 }
30517 break;
30518
sewardj21459cb2012-06-18 14:05:52 +000030519 case 0x20:
sewardj4ed05e02012-06-18 15:01:30 +000030520 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
30521 if (have66noF2noF3(pfx)
30522 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30523 UChar modrm = getUChar(delta);
30524 UInt rG = gregOfRexRM(pfx, modrm);
30525 UInt rV = getVexNvvvv(pfx);
sewardj21459cb2012-06-18 14:05:52 +000030526 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000030527 IRTemp src_u8 = newTemp(Ity_I8);
sewardj21459cb2012-06-18 14:05:52 +000030528
30529 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000030530 UInt rE = eregOfRexRM(pfx,modrm);
30531 imm8 = (Int)(getUChar(delta+1) & 15);
30532 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
sewardj21459cb2012-06-18 14:05:52 +000030533 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000030534 DIP( "vpinsrb $%d,%s,%s,%s\n",
30535 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000030536 } else {
30537 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj4ed05e02012-06-18 15:01:30 +000030538 imm8 = (Int)(getUChar(delta+alen) & 15);
30539 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj21459cb2012-06-18 14:05:52 +000030540 delta += alen+1;
sewardj4ed05e02012-06-18 15:01:30 +000030541 DIP( "vpinsrb $%d,%s,%s,%s\n",
30542 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000030543 }
30544
30545 IRTemp src_vec = newTemp(Ity_V128);
30546 assign(src_vec, getXMMReg( rV ));
sewardj4ed05e02012-06-18 15:01:30 +000030547 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
30548 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000030549 *uses_vvvv = True;
30550 goto decode_success;
30551 }
sewardj4ed05e02012-06-18 15:01:30 +000030552 break;
sewardj21459cb2012-06-18 14:05:52 +000030553
sewardjcfca8cd2012-05-27 08:25:42 +000030554 case 0x21:
30555 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
30556 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
30557 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30558 UChar modrm = getUChar(delta);
30559 UInt rG = gregOfRexRM(pfx, modrm);
30560 UInt rV = getVexNvvvv(pfx);
30561 UInt imm8;
30562 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
30563 const IRTemp inval = IRTemp_INVALID;
30564
30565 if ( epartIsReg( modrm ) ) {
30566 UInt rE = eregOfRexRM(pfx, modrm);
30567 IRTemp vE = newTemp(Ity_V128);
30568 assign( vE, getXMMReg(rE) );
30569 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000030570 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000030571 imm8 = getUChar(delta+1);
30572 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
30573 delta += 1+1;
30574 DIP( "insertps $%u, %s,%s\n",
30575 imm8, nameXMMReg(rE), nameXMMReg(rG) );
30576 } else {
30577 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30578 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
30579 imm8 = getUChar(delta+alen);
30580 delta += alen+1;
30581 DIP( "insertps $%u, %s,%s\n",
30582 imm8, dis_buf, nameXMMReg(rG) );
30583 }
30584
30585 IRTemp vV = newTemp(Ity_V128);
30586 assign( vV, getXMMReg(rV) );
30587
30588 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
30589 *uses_vvvv = True;
30590 goto decode_success;
30591 }
30592 break;
30593
sewardj6faf7cc2012-05-25 15:53:01 +000030594 case 0x22:
30595 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
30596 if (have66noF2noF3(pfx)
30597 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30598 UChar modrm = getUChar(delta);
30599 UInt rG = gregOfRexRM(pfx, modrm);
30600 UInt rV = getVexNvvvv(pfx);
30601 Int imm8_10;
30602 IRTemp src_u32 = newTemp(Ity_I32);
30603
30604 if ( epartIsReg( modrm ) ) {
30605 UInt rE = eregOfRexRM(pfx,modrm);
30606 imm8_10 = (Int)(getUChar(delta+1) & 3);
30607 assign( src_u32, getIReg32( rE ) );
30608 delta += 1+1;
30609 DIP( "vpinsrd $%d,%s,%s,%s\n",
30610 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
30611 } else {
30612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30613 imm8_10 = (Int)(getUChar(delta+alen) & 3);
30614 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
30615 delta += alen+1;
sewardj98d02cc2012-06-02 11:55:25 +000030616 DIP( "vpinsrd $%d,%s,%s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000030617 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30618 }
30619
30620 IRTemp src_vec = newTemp(Ity_V128);
30621 assign(src_vec, getXMMReg( rV ));
30622 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
30623 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30624 *uses_vvvv = True;
30625 goto decode_success;
30626 }
sewardj98d02cc2012-06-02 11:55:25 +000030627 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
30628 if (have66noF2noF3(pfx)
30629 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
30630 UChar modrm = getUChar(delta);
30631 UInt rG = gregOfRexRM(pfx, modrm);
30632 UInt rV = getVexNvvvv(pfx);
30633 Int imm8_0;
30634 IRTemp src_u64 = newTemp(Ity_I64);
30635
30636 if ( epartIsReg( modrm ) ) {
30637 UInt rE = eregOfRexRM(pfx,modrm);
30638 imm8_0 = (Int)(getUChar(delta+1) & 1);
30639 assign( src_u64, getIReg64( rE ) );
30640 delta += 1+1;
30641 DIP( "vpinsrq $%d,%s,%s,%s\n",
30642 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
30643 } else {
30644 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30645 imm8_0 = (Int)(getUChar(delta+alen) & 1);
30646 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
30647 delta += alen+1;
30648 DIP( "vpinsrd $%d,%s,%s,%s\n",
30649 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30650 }
30651
30652 IRTemp src_vec = newTemp(Ity_V128);
30653 assign(src_vec, getXMMReg( rV ));
30654 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
30655 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30656 *uses_vvvv = True;
30657 goto decode_success;
30658 }
sewardj6faf7cc2012-05-25 15:53:01 +000030659 break;
30660
sewardjcc3d2192013-03-27 11:37:33 +000030661 case 0x38:
30662 /* VINSERTI128 r/m, rV, rD
30663 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
30664 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
30665 if (have66noF2noF3(pfx)
30666 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30667 UChar modrm = getUChar(delta);
30668 UInt ib = 0;
30669 UInt rG = gregOfRexRM(pfx, modrm);
30670 UInt rV = getVexNvvvv(pfx);
30671 IRTemp t128 = newTemp(Ity_V128);
30672 if (epartIsReg(modrm)) {
30673 UInt rE = eregOfRexRM(pfx, modrm);
30674 delta += 1;
30675 assign(t128, getXMMReg(rE));
30676 ib = getUChar(delta);
30677 DIP("vinserti128 $%u,%s,%s,%s\n",
30678 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30679 } else {
30680 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30681 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
30682 delta += alen;
30683 ib = getUChar(delta);
30684 DIP("vinserti128 $%u,%s,%s,%s\n",
30685 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30686 }
30687 delta++;
30688 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
30689 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
30690 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
30691 *uses_vvvv = True;
30692 goto decode_success;
30693 }
30694 break;
30695
30696 case 0x39:
30697 /* VEXTRACTI128 $lane_no, rS, r/m
30698 ::: r/m:V128 = a lane of rS:V256 (RM format) */
30699 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
30700 if (have66noF2noF3(pfx)
30701 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30702 UChar modrm = getUChar(delta);
30703 UInt ib = 0;
30704 UInt rS = gregOfRexRM(pfx, modrm);
30705 IRTemp t128 = newTemp(Ity_V128);
30706 if (epartIsReg(modrm)) {
30707 UInt rD = eregOfRexRM(pfx, modrm);
30708 delta += 1;
30709 ib = getUChar(delta);
30710 assign(t128, getYMMRegLane128(rS, ib & 1));
30711 putYMMRegLoAndZU(rD, mkexpr(t128));
30712 DIP("vextracti128 $%u,%s,%s\n",
30713 ib, nameXMMReg(rS), nameYMMReg(rD));
30714 } else {
30715 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30716 delta += alen;
30717 ib = getUChar(delta);
30718 assign(t128, getYMMRegLane128(rS, ib & 1));
30719 storeLE(mkexpr(addr), mkexpr(t128));
30720 DIP("vextracti128 $%u,%s,%s\n",
30721 ib, nameYMMReg(rS), dis_buf);
30722 }
30723 delta++;
30724 /* doesn't use vvvv */
30725 goto decode_success;
30726 }
30727 break;
30728
sewardjadf357c2012-06-24 13:44:17 +000030729 case 0x40:
30730 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
30731 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30732 UChar modrm = getUChar(delta);
30733 UInt rG = gregOfRexRM(pfx, modrm);
30734 UInt rV = getVexNvvvv(pfx);
30735 IRTemp dst_vec = newTemp(Ity_V128);
30736 Int imm8;
30737 if (epartIsReg( modrm )) {
30738 UInt rE = eregOfRexRM(pfx,modrm);
30739 imm8 = (Int)getUChar(delta+1);
30740 assign( dst_vec, getXMMReg( rE ) );
30741 delta += 1+1;
30742 DIP( "vdpps $%d,%s,%s,%s\n",
30743 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30744 } else {
30745 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30746 imm8 = (Int)getUChar(delta+alen);
30747 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30748 delta += alen+1;
30749 DIP( "vdpps $%d,%s,%s,%s\n",
30750 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30751 }
30752
30753 IRTemp src_vec = newTemp(Ity_V128);
30754 assign(src_vec, getXMMReg( rV ));
30755 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
30756 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30757 *uses_vvvv = True;
30758 goto decode_success;
30759 }
30760 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
30761 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30762 UChar modrm = getUChar(delta);
30763 UInt rG = gregOfRexRM(pfx, modrm);
30764 UInt rV = getVexNvvvv(pfx);
30765 IRTemp dst_vec = newTemp(Ity_V256);
30766 Int imm8;
30767 if (epartIsReg( modrm )) {
30768 UInt rE = eregOfRexRM(pfx,modrm);
30769 imm8 = (Int)getUChar(delta+1);
30770 assign( dst_vec, getYMMReg( rE ) );
30771 delta += 1+1;
30772 DIP( "vdpps $%d,%s,%s,%s\n",
30773 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
30774 } else {
30775 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30776 imm8 = (Int)getUChar(delta+alen);
30777 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
30778 delta += alen+1;
30779 DIP( "vdpps $%d,%s,%s,%s\n",
30780 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
30781 }
30782
30783 IRTemp src_vec = newTemp(Ity_V256);
30784 assign(src_vec, getYMMReg( rV ));
30785 IRTemp s0, s1, d0, d1;
30786 s0 = s1 = d0 = d1 = IRTemp_INVALID;
30787 breakupV256toV128s( dst_vec, &d1, &d0 );
30788 breakupV256toV128s( src_vec, &s1, &s0 );
30789 putYMMReg( rG, binop( Iop_V128HLtoV256,
30790 mkexpr( math_DPPS_128(s1, d1, imm8) ),
30791 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
30792 *uses_vvvv = True;
30793 goto decode_success;
30794 }
30795 break;
30796
sewardj4ed05e02012-06-18 15:01:30 +000030797 case 0x41:
sewardjadf357c2012-06-24 13:44:17 +000030798 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
sewardj4ed05e02012-06-18 15:01:30 +000030799 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30800 UChar modrm = getUChar(delta);
30801 UInt rG = gregOfRexRM(pfx, modrm);
30802 UInt rV = getVexNvvvv(pfx);
30803 IRTemp dst_vec = newTemp(Ity_V128);
30804 Int imm8;
30805 if (epartIsReg( modrm )) {
30806 UInt rE = eregOfRexRM(pfx,modrm);
30807 imm8 = (Int)getUChar(delta+1);
30808 assign( dst_vec, getXMMReg( rE ) );
30809 delta += 1+1;
30810 DIP( "vdppd $%d,%s,%s,%s\n",
30811 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30812 } else {
30813 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30814 imm8 = (Int)getUChar(delta+alen);
30815 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30816 delta += alen+1;
30817 DIP( "vdppd $%d,%s,%s,%s\n",
30818 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30819 }
30820
30821 IRTemp src_vec = newTemp(Ity_V128);
30822 assign(src_vec, getXMMReg( rV ));
30823 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
30824 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30825 *uses_vvvv = True;
30826 goto decode_success;
30827 }
30828 break;
30829
sewardj8516a1f2012-06-24 14:26:30 +000030830 case 0x42:
30831 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
30832 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
30833 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30834 UChar modrm = getUChar(delta);
30835 Int imm8;
30836 IRTemp src_vec = newTemp(Ity_V128);
30837 IRTemp dst_vec = newTemp(Ity_V128);
30838 UInt rG = gregOfRexRM(pfx, modrm);
30839 UInt rV = getVexNvvvv(pfx);
30840
30841 assign( dst_vec, getXMMReg(rV) );
30842
30843 if ( epartIsReg( modrm ) ) {
30844 UInt rE = eregOfRexRM(pfx, modrm);
30845
30846 imm8 = (Int)getUChar(delta+1);
30847 assign( src_vec, getXMMReg(rE) );
30848 delta += 1+1;
30849 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30850 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30851 } else {
30852 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30853 1/* imm8 is 1 byte after the amode */ );
sewardj8516a1f2012-06-24 14:26:30 +000030854 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30855 imm8 = (Int)getUChar(delta+alen);
30856 delta += alen+1;
30857 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30858 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30859 }
30860
30861 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
30862 src_vec, imm8) ) );
30863 *uses_vvvv = True;
30864 goto decode_success;
30865 }
sewardjcc3d2192013-03-27 11:37:33 +000030866 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
30867 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
30868 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30869 UChar modrm = getUChar(delta);
30870 Int imm8;
30871 IRTemp src_vec = newTemp(Ity_V256);
30872 IRTemp dst_vec = newTemp(Ity_V256);
30873 UInt rG = gregOfRexRM(pfx, modrm);
30874 UInt rV = getVexNvvvv(pfx);
30875 IRTemp sHi, sLo, dHi, dLo;
30876 sHi = sLo = dHi = dLo = IRTemp_INVALID;
30877
30878 assign( dst_vec, getYMMReg(rV) );
30879
30880 if ( epartIsReg( modrm ) ) {
30881 UInt rE = eregOfRexRM(pfx, modrm);
30882
30883 imm8 = (Int)getUChar(delta+1);
30884 assign( src_vec, getYMMReg(rE) );
30885 delta += 1+1;
30886 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30887 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
30888 } else {
30889 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30890 1/* imm8 is 1 byte after the amode */ );
30891 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
30892 imm8 = (Int)getUChar(delta+alen);
30893 delta += alen+1;
30894 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30895 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
30896 }
30897
30898 breakupV256toV128s( dst_vec, &dHi, &dLo );
30899 breakupV256toV128s( src_vec, &sHi, &sLo );
30900 putYMMReg( rG, binop( Iop_V128HLtoV256,
30901 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
30902 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
30903 *uses_vvvv = True;
30904 goto decode_success;
30905 }
sewardj8516a1f2012-06-24 14:26:30 +000030906 break;
30907
sewardj1407a362012-06-24 15:11:38 +000030908 case 0x44:
30909 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
30910 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
30911 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
30912 * Carry-less multiplication of selected XMM quadwords into XMM
30913 * registers (a.k.a multiplication of polynomials over GF(2))
30914 */
30915 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30916 UChar modrm = getUChar(delta);
30917 Int imm8;
30918 IRTemp sV = newTemp(Ity_V128);
30919 IRTemp dV = newTemp(Ity_V128);
30920 UInt rG = gregOfRexRM(pfx, modrm);
30921 UInt rV = getVexNvvvv(pfx);
30922
30923 assign( dV, getXMMReg(rV) );
30924
30925 if ( epartIsReg( modrm ) ) {
30926 UInt rE = eregOfRexRM(pfx, modrm);
30927 imm8 = (Int)getUChar(delta+1);
30928 assign( sV, getXMMReg(rE) );
30929 delta += 1+1;
30930 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
30931 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30932 } else {
30933 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30934 1/* imm8 is 1 byte after the amode */ );
30935 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
30936 imm8 = (Int)getUChar(delta+alen);
30937 delta += alen+1;
30938 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
30939 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30940 }
30941
30942 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
30943 *uses_vvvv = True;
30944 goto decode_success;
30945 }
30946 break;
30947
sewardjcc3d2192013-03-27 11:37:33 +000030948 case 0x46:
30949 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
30950 if (have66noF2noF3(pfx)
30951 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30952 UChar modrm = getUChar(delta);
30953 UInt imm8 = 0;
30954 UInt rG = gregOfRexRM(pfx, modrm);
30955 UInt rV = getVexNvvvv(pfx);
30956 IRTemp s00 = newTemp(Ity_V128);
30957 IRTemp s01 = newTemp(Ity_V128);
30958 IRTemp s10 = newTemp(Ity_V128);
30959 IRTemp s11 = newTemp(Ity_V128);
30960 assign(s00, getYMMRegLane128(rV, 0));
30961 assign(s01, getYMMRegLane128(rV, 1));
30962 if (epartIsReg(modrm)) {
30963 UInt rE = eregOfRexRM(pfx, modrm);
30964 delta += 1;
30965 imm8 = getUChar(delta);
30966 DIP("vperm2i128 $%u,%s,%s,%s\n",
30967 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30968 assign(s10, getYMMRegLane128(rE, 0));
30969 assign(s11, getYMMRegLane128(rE, 1));
30970 } else {
30971 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30972 delta += alen;
30973 imm8 = getUChar(delta);
30974 DIP("vperm2i128 $%u,%s,%s,%s\n",
30975 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30976 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30977 mkexpr(addr), mkU64(0))));
30978 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30979 mkexpr(addr), mkU64(16))));
30980 }
30981 delta++;
30982# define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30983 : ((_nn)==2) ? s10 : s11)
30984 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30985 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30986# undef SEL
30987 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30988 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30989 *uses_vvvv = True;
30990 goto decode_success;
30991 }
30992 break;
30993
sewardj4c0a7ac2012-06-21 09:08:19 +000030994 case 0x4A:
30995 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
30996 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
30997 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
30998 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30999 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31000 "vblendvps", 4, Iop_SarN32x4 );
31001 *uses_vvvv = True;
31002 goto decode_success;
31003 }
31004 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31005 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31006 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31007 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31008 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31009 "vblendvps", 4, Iop_SarN32x4 );
31010 *uses_vvvv = True;
31011 goto decode_success;
31012 }
31013 break;
31014
31015 case 0x4B:
31016 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31017 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31018 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31019 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31020 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31021 "vblendvpd", 8, Iop_SarN64x2 );
31022 *uses_vvvv = True;
31023 goto decode_success;
31024 }
31025 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31026 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31027 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31028 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31029 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31030 "vblendvpd", 8, Iop_SarN64x2 );
31031 *uses_vvvv = True;
31032 goto decode_success;
31033 }
31034 break;
31035
sewardjc4530ae2012-05-21 10:18:49 +000031036 case 0x4C:
31037 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31038 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
sewardj4c0a7ac2012-06-21 09:08:19 +000031039 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31040 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31041 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31042 "vpblendvb", 1, Iop_SarN8x16 );
sewardjc4530ae2012-05-21 10:18:49 +000031043 *uses_vvvv = True;
31044 goto decode_success;
sewardj4c0a7ac2012-06-21 09:08:19 +000031045 }
sewardjcc3d2192013-03-27 11:37:33 +000031046 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31047 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31048 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31049 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31050 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31051 "vpblendvb", 1, Iop_SarN8x16 );
31052 *uses_vvvv = True;
31053 goto decode_success;
31054 }
sewardj4c0a7ac2012-06-21 09:08:19 +000031055 break;
sewardjc4530ae2012-05-21 10:18:49 +000031056
sewardjac75d7b2012-05-23 12:42:39 +000031057 case 0x60:
31058 case 0x61:
31059 case 0x62:
31060 case 0x63:
31061 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31062 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31063 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31064 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31065 (selected special cases that actually occur in glibc,
31066 not by any means a complete implementation.)
31067 */
31068 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31069 Long delta0 = delta;
31070 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
31071 if (delta > delta0) goto decode_success;
31072 /* else fall though; dis_PCMPxSTRx failed to decode it */
31073 }
31074 break;
31075
sewardj1407a362012-06-24 15:11:38 +000031076 case 0xDF:
31077 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
31078 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31079 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
31080 goto decode_success;
31081 }
31082 break;
31083
sewardjcc3d2192013-03-27 11:37:33 +000031084 case 0xF0:
31085 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
31086 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
31087 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
31088 Int size = getRexW(pfx) ? 8 : 4;
31089 IRType ty = szToITy(size);
31090 IRTemp src = newTemp(ty);
31091 UChar rm = getUChar(delta);
31092 UChar imm8;
31093
31094 if (epartIsReg(rm)) {
31095 imm8 = getUChar(delta+1);
31096 assign( src, getIRegE(size,pfx,rm) );
31097 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
31098 nameIRegG(size,pfx,rm));
31099 delta += 2;
31100 } else {
31101 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
31102 imm8 = getUChar(delta+alen);
31103 assign( src, loadLE(ty, mkexpr(addr)) );
31104 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
31105 delta += alen + 1;
31106 }
31107 imm8 &= 8*size-1;
31108
31109 /* dst = (src >>u imm8) | (src << (size-imm8)) */
31110 putIRegG( size, pfx, rm,
31111 imm8 == 0 ? mkexpr(src)
31112 : binop( mkSizedOp(ty,Iop_Or8),
31113 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
31114 mkU8(imm8) ),
31115 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
31116 mkU8(8*size-imm8) ) ) );
31117 /* Flags aren't modified. */
31118 goto decode_success;
31119 }
31120 break;
31121
sewardjc4530ae2012-05-21 10:18:49 +000031122 default:
31123 break;
31124
31125 }
31126
31127 //decode_failure:
31128 return deltaIN;
31129
31130 decode_success:
31131 return delta;
31132}
31133
31134
31135/*------------------------------------------------------------*/
31136/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000031137/*--- Disassemble a single instruction ---*/
sewardj80611e32012-01-20 13:07:24 +000031138/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000031139/*------------------------------------------------------------*/
31140
sewardj9e6491a2005-07-02 19:24:10 +000031141/* Disassemble a single instruction into IR. The instruction is
31142 located in host memory at &guest_code[delta]. */
sewardjdf0e0022005-01-25 15:48:43 +000031143
sewardj9e6491a2005-07-02 19:24:10 +000031144static
31145DisResult disInstr_AMD64_WRK (
sewardje9d8a262009-07-01 08:06:34 +000031146 /*OUT*/Bool* expect_CAS,
florianbeac5302014-12-31 12:09:38 +000031147 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardj984d9b12010-01-15 10:53:21 +000031148 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000031149 void* callback_opaque,
sewardj9e6491a2005-07-02 19:24:10 +000031150 Long delta64,
floriancacba8e2014-12-15 18:58:07 +000031151 const VexArchInfo* archinfo,
31152 const VexAbiInfo* vbi,
sewardj442e51a2012-12-06 18:08:04 +000031153 Bool sigill_diag
sewardj9e6491a2005-07-02 19:24:10 +000031154 )
sewardjdf0e0022005-01-25 15:48:43 +000031155{
floriandc369432014-12-24 12:38:52 +000031156 IRTemp t1, t2;
sewardj80611e32012-01-20 13:07:24 +000031157 UChar pre;
31158 Int n, n_prefixes;
sewardj9e6491a2005-07-02 19:24:10 +000031159 DisResult dres;
sewardjdf0e0022005-01-25 15:48:43 +000031160
sewardj9e6491a2005-07-02 19:24:10 +000031161 /* The running delta */
31162 Long delta = delta64;
31163
sewardjdf0e0022005-01-25 15:48:43 +000031164 /* Holds eip at the start of the insn, so that we can print
31165 consistent error messages for unimplemented insns. */
sewardj270def42005-07-03 01:03:01 +000031166 Long delta_start = delta;
sewardjdf0e0022005-01-25 15:48:43 +000031167
31168 /* sz denotes the nominal data-op size of the insn; we change it to
31169 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
31170 conflict REX.W takes precedence. */
31171 Int sz = 4;
31172
sewardj3ca55a12005-01-27 16:06:23 +000031173 /* pfx holds the summary of prefixes. */
31174 Prefix pfx = PFX_EMPTY;
sewardjdf0e0022005-01-25 15:48:43 +000031175
sewardjc4530ae2012-05-21 10:18:49 +000031176 /* Holds the computed opcode-escape indication. */
31177 Escape esc = ESC_NONE;
31178
sewardj9e6491a2005-07-02 19:24:10 +000031179 /* Set result defaults. */
sewardjc6f970f2012-04-02 21:54:49 +000031180 dres.whatNext = Dis_Continue;
31181 dres.len = 0;
31182 dres.continueAt = 0;
31183 dres.jk_StopHere = Ijk_INVALID;
sewardje9d8a262009-07-01 08:06:34 +000031184 *expect_CAS = False;
31185
sewardj9e6491a2005-07-02 19:24:10 +000031186 vassert(guest_RIP_next_assumed == 0);
31187 vassert(guest_RIP_next_mustcheck == False);
sewardj4b744762005-02-07 15:02:25 +000031188
floriandc369432014-12-24 12:38:52 +000031189 t1 = t2 = IRTemp_INVALID;
sewardjdf0e0022005-01-25 15:48:43 +000031190
sewardj9e6491a2005-07-02 19:24:10 +000031191 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
31192
sewardjce02aa72006-01-12 12:27:58 +000031193 /* Spot "Special" instructions (see comment at top of file). */
sewardjdf0e0022005-01-25 15:48:43 +000031194 {
florian8462d112014-09-24 15:18:09 +000031195 const UChar* code = guest_code + delta;
sewardjce02aa72006-01-12 12:27:58 +000031196 /* Spot the 16-byte preamble:
31197 48C1C703 rolq $3, %rdi
31198 48C1C70D rolq $13, %rdi
31199 48C1C73D rolq $61, %rdi
31200 48C1C733 rolq $51, %rdi
sewardjdf0e0022005-01-25 15:48:43 +000031201 */
sewardjce02aa72006-01-12 12:27:58 +000031202 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
31203 && code[ 3] == 0x03 &&
31204 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
31205 && code[ 7] == 0x0D &&
31206 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
31207 && code[11] == 0x3D &&
31208 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
31209 && code[15] == 0x33) {
31210 /* Got a "Special" instruction preamble. Which one is it? */
31211 if (code[16] == 0x48 && code[17] == 0x87
31212 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
31213 /* %RDX = client_request ( %RAX ) */
31214 DIP("%%rdx = client_request ( %%rax )\n");
31215 delta += 19;
sewardjc6f970f2012-04-02 21:54:49 +000031216 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
31217 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000031218 goto decode_success;
31219 }
31220 else
31221 if (code[16] == 0x48 && code[17] == 0x87
31222 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
31223 /* %RAX = guest_NRADDR */
31224 DIP("%%rax = guest_NRADDR\n");
31225 delta += 19;
31226 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
31227 goto decode_success;
31228 }
31229 else
31230 if (code[16] == 0x48 && code[17] == 0x87
31231 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
31232 /* call-noredir *%RAX */
31233 DIP("call-noredir *%%rax\n");
31234 delta += 19;
31235 t1 = newTemp(Ity_I64);
31236 assign(t1, getIRegRAX(8));
31237 t2 = newTemp(Ity_I64);
31238 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
31239 putIReg64(R_RSP, mkexpr(t2));
31240 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
sewardjc6f970f2012-04-02 21:54:49 +000031241 jmp_treg(&dres, Ijk_NoRedir, t1);
31242 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000031243 goto decode_success;
31244 }
florian2245ce92012-08-28 16:49:30 +000031245 else
31246 if (code[16] == 0x48 && code[17] == 0x87
31247 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
31248 /* IR injection */
31249 DIP("IR injection\n");
31250 vex_inject_ir(irsb, Iend_LE);
31251
31252 // Invalidate the current insn. The reason is that the IRop we're
31253 // injecting here can change. In which case the translation has to
31254 // be redone. For ease of handling, we simply invalidate all the
31255 // time.
sewardj05f5e012014-05-04 10:52:11 +000031256 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
31257 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
florian2245ce92012-08-28 16:49:30 +000031258
31259 delta += 19;
31260
31261 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
31262 dres.whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000031263 dres.jk_StopHere = Ijk_InvalICache;
florian2245ce92012-08-28 16:49:30 +000031264 goto decode_success;
31265 }
sewardjce02aa72006-01-12 12:27:58 +000031266 /* We don't know what it is. */
31267 goto decode_failure;
31268 /*NOTREACHED*/
sewardjdf0e0022005-01-25 15:48:43 +000031269 }
31270 }
31271
31272 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
31273 as many invalid combinations as possible. */
31274 n_prefixes = 0;
31275 while (True) {
sewardj54477e32007-08-23 18:53:59 +000031276 if (n_prefixes > 7) goto decode_failure;
sewardj8c332e22005-01-28 01:36:56 +000031277 pre = getUChar(delta);
sewardjdf0e0022005-01-25 15:48:43 +000031278 switch (pre) {
31279 case 0x66: pfx |= PFX_66; break;
31280 case 0x67: pfx |= PFX_ASO; break;
31281 case 0xF2: pfx |= PFX_F2; break;
31282 case 0xF3: pfx |= PFX_F3; break;
sewardje9d8a262009-07-01 08:06:34 +000031283 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
sewardjdf0e0022005-01-25 15:48:43 +000031284 case 0x2E: pfx |= PFX_CS; break;
31285 case 0x3E: pfx |= PFX_DS; break;
31286 case 0x26: pfx |= PFX_ES; break;
31287 case 0x64: pfx |= PFX_FS; break;
31288 case 0x65: pfx |= PFX_GS; break;
31289 case 0x36: pfx |= PFX_SS; break;
31290 case 0x40 ... 0x4F:
31291 pfx |= PFX_REX;
31292 if (pre & (1<<3)) pfx |= PFX_REXW;
31293 if (pre & (1<<2)) pfx |= PFX_REXR;
31294 if (pre & (1<<1)) pfx |= PFX_REXX;
31295 if (pre & (1<<0)) pfx |= PFX_REXB;
31296 break;
31297 default:
sewardjc4530ae2012-05-21 10:18:49 +000031298 goto not_a_legacy_prefix;
sewardjdf0e0022005-01-25 15:48:43 +000031299 }
31300 n_prefixes++;
31301 delta++;
31302 }
31303
sewardjc4530ae2012-05-21 10:18:49 +000031304 not_a_legacy_prefix:
31305 /* We've used up all the non-VEX prefixes. Parse and validate a
31306 VEX prefix if that's appropriate. */
31307 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
31308 /* Used temporarily for holding VEX prefixes. */
31309 UChar vex0 = getUChar(delta);
31310 if (vex0 == 0xC4) {
31311 /* 3-byte VEX */
31312 UChar vex1 = getUChar(delta+1);
31313 UChar vex2 = getUChar(delta+2);
31314 delta += 3;
31315 pfx |= PFX_VEX;
31316 /* Snarf contents of byte 1 */
31317 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
31318 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
31319 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
31320 /* m-mmmm */
31321 switch (vex1 & 0x1F) {
31322 case 1: esc = ESC_0F; break;
31323 case 2: esc = ESC_0F38; break;
31324 case 3: esc = ESC_0F3A; break;
31325 /* Any other m-mmmm field will #UD */
31326 default: goto decode_failure;
31327 }
31328 /* Snarf contents of byte 2 */
31329 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
31330 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
31331 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
31332 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
31333 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
31334 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
31335 /* pp */
31336 switch (vex2 & 3) {
31337 case 0: break;
31338 case 1: pfx |= PFX_66; break;
31339 case 2: pfx |= PFX_F3; break;
31340 case 3: pfx |= PFX_F2; break;
31341 default: vassert(0);
31342 }
31343 }
31344 else if (vex0 == 0xC5) {
31345 /* 2-byte VEX */
31346 UChar vex1 = getUChar(delta+1);
31347 delta += 2;
31348 pfx |= PFX_VEX;
31349 /* Snarf contents of byte 1 */
31350 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
31351 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
31352 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
31353 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
31354 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
31355 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
31356 /* pp */
31357 switch (vex1 & 3) {
31358 case 0: break;
31359 case 1: pfx |= PFX_66; break;
31360 case 2: pfx |= PFX_F3; break;
31361 case 3: pfx |= PFX_F2; break;
31362 default: vassert(0);
31363 }
31364 /* implied: */
31365 esc = ESC_0F;
31366 }
31367 /* Can't have both VEX and REX */
31368 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
31369 goto decode_failure; /* can't have both */
31370 }
sewardjdf0e0022005-01-25 15:48:43 +000031371
sewardj42561ef2005-11-04 14:18:31 +000031372 /* Dump invalid combinations */
sewardjdf0e0022005-01-25 15:48:43 +000031373 n = 0;
31374 if (pfx & PFX_F2) n++;
31375 if (pfx & PFX_F3) n++;
sewardj3ca55a12005-01-27 16:06:23 +000031376 if (n > 1)
31377 goto decode_failure; /* can't have both */
sewardjdf0e0022005-01-25 15:48:43 +000031378
31379 n = 0;
31380 if (pfx & PFX_CS) n++;
31381 if (pfx & PFX_DS) n++;
31382 if (pfx & PFX_ES) n++;
31383 if (pfx & PFX_FS) n++;
31384 if (pfx & PFX_GS) n++;
31385 if (pfx & PFX_SS) n++;
sewardj3ca55a12005-01-27 16:06:23 +000031386 if (n > 1)
31387 goto decode_failure; /* multiple seg overrides == illegal */
sewardjdf0e0022005-01-25 15:48:43 +000031388
sewardjceccb292009-01-22 20:40:22 +000031389 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
31390 that we should accept it. */
philippee2cc4de2014-12-16 23:57:51 +000031391 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
sewardjceccb292009-01-22 20:40:22 +000031392 goto decode_failure;
31393
31394 /* Ditto for %gs prefixes. */
philippee2cc4de2014-12-16 23:57:51 +000031395 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
sewardjceccb292009-01-22 20:40:22 +000031396 goto decode_failure;
sewardj42561ef2005-11-04 14:18:31 +000031397
sewardjdf0e0022005-01-25 15:48:43 +000031398 /* Set up sz. */
31399 sz = 4;
31400 if (pfx & PFX_66) sz = 2;
31401 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
31402
sewardje9d8a262009-07-01 08:06:34 +000031403 /* Now we should be looking at the primary opcode byte or the
sewardj80611e32012-01-20 13:07:24 +000031404 leading escapes. Check that any LOCK prefix is actually
sewardje9d8a262009-07-01 08:06:34 +000031405 allowed. */
sewardj6491f862013-10-15 17:29:19 +000031406 if (haveLOCK(pfx)) {
florian8462d112014-09-24 15:18:09 +000031407 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
sewardjc4356f02007-11-09 21:15:04 +000031408 DIP("lock ");
31409 } else {
sewardje9d8a262009-07-01 08:06:34 +000031410 *expect_CAS = False;
sewardjc4356f02007-11-09 21:15:04 +000031411 goto decode_failure;
31412 }
sewardjdf0e0022005-01-25 15:48:43 +000031413 }
31414
sewardj80611e32012-01-20 13:07:24 +000031415 /* Eat up opcode escape bytes, until we're really looking at the
sewardjc4530ae2012-05-21 10:18:49 +000031416 primary opcode byte. But only if there's no VEX present. */
31417 if (!(pfx & PFX_VEX)) {
31418 vassert(esc == ESC_NONE);
sewardj80611e32012-01-20 13:07:24 +000031419 pre = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000031420 if (pre == 0x0F) {
31421 delta++;
31422 pre = getUChar(delta);
31423 switch (pre) {
31424 case 0x38: esc = ESC_0F38; delta++; break;
31425 case 0x3A: esc = ESC_0F3A; delta++; break;
31426 default: esc = ESC_0F; break;
31427 }
sewardj80611e32012-01-20 13:07:24 +000031428 }
31429 }
31430
31431 /* So now we're really really looking at the primary opcode
31432 byte. */
31433 Long delta_at_primary_opcode = delta;
sewardjc4530ae2012-05-21 10:18:49 +000031434
31435 if (!(pfx & PFX_VEX)) {
31436 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
31437 instructions preserve the upper 128 bits of YMM registers;
31438 iow we can simply ignore the presence of the upper halves of
31439 these registers. */
31440 switch (esc) {
31441 case ESC_NONE:
31442 delta = dis_ESC_NONE( &dres, expect_CAS,
31443 resteerOkFn, resteerCisOk, callback_opaque,
31444 archinfo, vbi, pfx, sz, delta );
31445 break;
31446 case ESC_0F:
31447 delta = dis_ESC_0F ( &dres, expect_CAS,
31448 resteerOkFn, resteerCisOk, callback_opaque,
31449 archinfo, vbi, pfx, sz, delta );
31450 break;
31451 case ESC_0F38:
31452 delta = dis_ESC_0F38( &dres,
31453 resteerOkFn, resteerCisOk, callback_opaque,
31454 archinfo, vbi, pfx, sz, delta );
31455 break;
31456 case ESC_0F3A:
31457 delta = dis_ESC_0F3A( &dres,
31458 resteerOkFn, resteerCisOk, callback_opaque,
31459 archinfo, vbi, pfx, sz, delta );
31460 break;
31461 default:
31462 vassert(0);
31463 }
31464 } else {
31465 /* VEX prefixed instruction */
31466 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
31467 prefix that loads a YMM register operand ..." zeroes out bits
31468 128 and above of the register. */
31469 Bool uses_vvvv = False;
31470 switch (esc) {
31471 case ESC_0F:
31472 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
31473 resteerOkFn, resteerCisOk,
31474 callback_opaque,
31475 archinfo, vbi, pfx, sz, delta );
31476 break;
31477 case ESC_0F38:
31478 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
31479 resteerOkFn, resteerCisOk,
31480 callback_opaque,
31481 archinfo, vbi, pfx, sz, delta );
31482 break;
31483 case ESC_0F3A:
31484 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
31485 resteerOkFn, resteerCisOk,
31486 callback_opaque,
31487 archinfo, vbi, pfx, sz, delta );
31488 break;
sewardjac75d7b2012-05-23 12:42:39 +000031489 case ESC_NONE:
31490 /* The presence of a VEX prefix, by Intel definition,
31491 always implies at least an 0F escape. */
31492 goto decode_failure;
sewardjc4530ae2012-05-21 10:18:49 +000031493 default:
sewardjac75d7b2012-05-23 12:42:39 +000031494 vassert(0);
sewardjc4530ae2012-05-21 10:18:49 +000031495 }
31496 /* If the insn doesn't use VEX.vvvv then it must be all ones.
31497 Check this. */
31498 if (!uses_vvvv) {
31499 if (getVexNvvvv(pfx) != 0)
31500 goto decode_failure;
31501 }
sewardj80611e32012-01-20 13:07:24 +000031502 }
sewardjc4530ae2012-05-21 10:18:49 +000031503
sewardj80611e32012-01-20 13:07:24 +000031504 vassert(delta - delta_at_primary_opcode >= 0);
31505 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
31506
31507 /* Use delta == delta_at_primary_opcode to denote decode failure.
31508 This implies that any successful decode must use at least one
31509 byte up. */
31510 if (delta == delta_at_primary_opcode)
31511 goto decode_failure;
31512 else
31513 goto decode_success; /* \o/ */
31514
31515#if 0 /* XYZZY */
sewardja6b93d12005-02-17 09:28:28 +000031516
31517 /* ---------------------------------------------------- */
sewardj09717342005-05-05 21:34:02 +000031518 /* --- The SSE/SSE2 decoder. --- */
sewardja6b93d12005-02-17 09:28:28 +000031519 /* ---------------------------------------------------- */
31520
31521 /* What did I do to deserve SSE ? Perhaps I was really bad in a
31522 previous life? */
31523
sewardj09717342005-05-05 21:34:02 +000031524 /* Note, this doesn't handle SSE3 right now. All amd64s support
31525 SSE2 as a minimum so there is no point distinguishing SSE1 vs
31526 SSE2. */
31527
florian8462d112014-09-24 15:18:09 +000031528 insn = &guest_code[delta];
sewardja6b93d12005-02-17 09:28:28 +000031529
sewardj5abcfe62007-01-10 04:59:33 +000031530 /* FXSAVE is spuriously at the start here only because it is
31531 thusly placed in guest-x86/toIR.c. */
31532
sewardj5abcfe62007-01-10 04:59:33 +000031533 /* ------ SSE decoder main ------ */
sewardj432f8b62005-05-10 02:50:05 +000031534
sewardj5992bd02005-05-11 02:13:42 +000031535 /* ---------------------------------------------------- */
31536 /* --- end of the SSE decoder. --- */
31537 /* ---------------------------------------------------- */
31538
31539 /* ---------------------------------------------------- */
31540 /* --- start of the SSE2 decoder. --- */
31541 /* ---------------------------------------------------- */
sewardj4c328cf2005-05-05 12:05:54 +000031542
sewardjdf0e0022005-01-25 15:48:43 +000031543 /* ---------------------------------------------------- */
31544 /* --- end of the SSE/SSE2 decoder. --- */
31545 /* ---------------------------------------------------- */
31546
sewardjfcf21f32006-08-04 14:51:19 +000031547 /* ---------------------------------------------------- */
31548 /* --- start of the SSE3 decoder. --- */
31549 /* ---------------------------------------------------- */
31550
sewardjfcf21f32006-08-04 14:51:19 +000031551 /* ---------------------------------------------------- */
31552 /* --- end of the SSE3 decoder. --- */
31553 /* ---------------------------------------------------- */
31554
sewardjd166e282008-02-06 11:42:45 +000031555 /* ---------------------------------------------------- */
31556 /* --- start of the SSSE3 decoder. --- */
31557 /* ---------------------------------------------------- */
31558
sewardjd166e282008-02-06 11:42:45 +000031559 /* ---------------------------------------------------- */
31560 /* --- end of the SSSE3 decoder. --- */
31561 /* ---------------------------------------------------- */
31562
de5a70f5c2010-04-01 23:08:59 +000031563 /* ---------------------------------------------------- */
31564 /* --- start of the SSE4 decoder --- */
31565 /* ---------------------------------------------------- */
31566
de5a70f5c2010-04-01 23:08:59 +000031567 /* ---------------------------------------------------- */
31568 /* --- end of the SSE4 decoder --- */
31569 /* ---------------------------------------------------- */
31570
sewardj7a240552005-01-28 21:37:12 +000031571 /*after_sse_decoders:*/
sewardjdf0e0022005-01-25 15:48:43 +000031572
31573 /* Get the primary opcode. */
sewardj8c332e22005-01-28 01:36:56 +000031574 opc = getUChar(delta); delta++;
sewardjdf0e0022005-01-25 15:48:43 +000031575
31576 /* We get here if the current insn isn't SSE, or this CPU doesn't
31577 support SSE. */
31578
31579 switch (opc) {
31580
31581 /* ------------------------ Control flow --------------- */
31582
sewardj3ca55a12005-01-27 16:06:23 +000031583 /* ------------------------ CWD/CDQ -------------------- */
31584
sewardj8d965312005-02-25 02:48:47 +000031585 /* ------------------------ FPU ops -------------------- */
31586
sewardj4fa325a2005-11-03 13:27:24 +000031587 /* ------------------------ INT ------------------------ */
31588
31589 case 0xCD: { /* INT imm8 */
31590 IRJumpKind jk = Ijk_Boring;
31591 if (have66orF2orF3(pfx)) goto decode_failure;
31592 d64 = getUChar(delta); delta++;
31593 switch (d64) {
31594 case 32: jk = Ijk_Sys_int32; break;
31595 default: goto decode_failure;
31596 }
31597 guest_RIP_next_mustcheck = True;
31598 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
31599 jmp_lit(jk, guest_RIP_next_assumed);
31600 /* It's important that all ArchRegs carry their up-to-date value
31601 at this point. So we declare an end-of-block here, which
31602 forces any TempRegs caching ArchRegs to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000031603 vassert(dres.whatNext == Dis_StopHere);
sewardj4fa325a2005-11-03 13:27:24 +000031604 DIP("int $0x%02x\n", (UInt)d64);
31605 break;
31606 }
31607
sewardjf8c37f72005-02-07 18:55:29 +000031608 /* ------------------------ Jcond, byte offset --------- */
31609
sewardj32b2bbe2005-01-28 00:50:10 +000031610 /* ------------------------ IMUL ----------------------- */
31611
sewardj1389d4d2005-01-28 13:46:29 +000031612 /* ------------------------ MOV ------------------------ */
31613
sewardj5e525292005-01-28 15:13:10 +000031614 /* ------------------------ MOVx ------------------------ */
31615
sewardj4c328cf2005-05-05 12:05:54 +000031616 /* ------------------------ opl imm, A ----------------- */
31617
sewardj118b23e2005-01-29 02:14:44 +000031618 /* ------------------------ opl Ev, Gv ----------------- */
sewardj118b23e2005-01-29 02:14:44 +000031619
31620 /* ------------------------ opl Gv, Ev ----------------- */
31621
sewardj55dbb262005-01-28 16:36:51 +000031622 /* ------------------------ POP ------------------------ */
31623
sewardj55dbb262005-01-28 16:36:51 +000031624 /* ------------------------ PUSH ----------------------- */
31625
sewardj909c06d2005-02-19 22:47:41 +000031626 /* ------ AE: SCAS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000031627
sewardj909c06d2005-02-19 22:47:41 +000031628 /* ------ A6, A7: CMPS variants ------ */
sewardja6b93d12005-02-17 09:28:28 +000031629
sewardj909c06d2005-02-19 22:47:41 +000031630 /* ------ AA, AB: STOS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000031631
sewardj909c06d2005-02-19 22:47:41 +000031632 /* ------ A4, A5: MOVS variants ------ */
sewardj7de0d3c2005-02-13 02:26:41 +000031633
31634 /* ------------------------ XCHG ----------------------- */
31635
sewardjbb4396c2007-11-20 17:29:08 +000031636 /* ------------------------ IN / OUT ----------------------- */
31637
sewardj3ca55a12005-01-27 16:06:23 +000031638 /* ------------------------ (Grp1 extensions) ---------- */
31639
sewardj118b23e2005-01-29 02:14:44 +000031640 /* ------------------------ (Grp2 extensions) ---------- */
sewardj03b07cc2005-01-31 18:09:43 +000031641
sewardj32b2bbe2005-01-28 00:50:10 +000031642 /* ------------------------ (Grp3 extensions) ---------- */
31643
sewardj03b07cc2005-01-31 18:09:43 +000031644 /* ------------------------ (Grp4 extensions) ---------- */
31645
sewardj354e5c62005-01-27 20:12:52 +000031646 /* ------------------------ (Grp5 extensions) ---------- */
31647
sewardj3ca55a12005-01-27 16:06:23 +000031648 /* ------------------------ Escapes to 2-byte opcodes -- */
31649
31650 case 0x0F: {
sewardj8c332e22005-01-28 01:36:56 +000031651 opc = getUChar(delta); delta++;
sewardj3ca55a12005-01-27 16:06:23 +000031652 switch (opc) {
31653
sewardj1d511802005-03-27 17:59:45 +000031654 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
31655
sewardjf53b7352005-04-06 20:01:56 +000031656 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
31657
sewardj82c9f2f2005-03-02 16:05:13 +000031658 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
31659
sewardj9ed16802005-08-24 10:46:19 +000031660 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
31661
sewardj3ca55a12005-01-27 16:06:23 +000031662 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
sewardj3ca55a12005-01-27 16:06:23 +000031663
sewardja6b93d12005-02-17 09:28:28 +000031664 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
31665
sewardjd0a12df2005-02-10 02:07:43 +000031666 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
31667
sewardj5e525292005-01-28 15:13:10 +000031668 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
31669
sewardj32b2bbe2005-01-28 00:50:10 +000031670 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
31671
sewardjec387ca2006-08-01 18:36:25 +000031672 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
31673
sewardj1389d4d2005-01-28 13:46:29 +000031674 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
sewardj1389d4d2005-01-28 13:46:29 +000031675
sewardjb04a47c2005-08-10 12:27:46 +000031676 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
sewardjb04a47c2005-08-10 12:27:46 +000031677
sewardj31191072005-02-05 18:24:47 +000031678 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000031679
31680 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000031681
sewardj33ef9c22005-11-04 20:05:57 +000031682 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
31683
sewardje1698952005-02-08 15:02:39 +000031684 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
sewardje1698952005-02-08 15:02:39 +000031685
sewardjb4fd2e72005-03-23 13:34:11 +000031686 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
31687
sewardjb9dc2432010-06-07 16:22:22 +000031688 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
sewardjb9dc2432010-06-07 16:22:22 +000031689
sewardj3ca55a12005-01-27 16:06:23 +000031690 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
31691
31692 default:
31693 goto decode_failure;
31694 } /* switch (opc) for the 2-byte opcodes */
31695 goto decode_success;
31696 } /* case 0x0F: of primary opcode */
sewardjdf0e0022005-01-25 15:48:43 +000031697
31698 /* ------------------------ ??? ------------------------ */
sewardj80611e32012-01-20 13:07:24 +000031699#endif /* XYZZY */
sewardjdf0e0022005-01-25 15:48:43 +000031700
sewardj80611e32012-01-20 13:07:24 +000031701 //default:
sewardjdf0e0022005-01-25 15:48:43 +000031702 decode_failure:
31703 /* All decode failures end up here. */
sewardj442e51a2012-12-06 18:08:04 +000031704 if (sigill_diag) {
31705 vex_printf("vex amd64->IR: unhandled instruction bytes: "
31706 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
31707 (Int)getUChar(delta_start+0),
31708 (Int)getUChar(delta_start+1),
31709 (Int)getUChar(delta_start+2),
31710 (Int)getUChar(delta_start+3),
31711 (Int)getUChar(delta_start+4),
31712 (Int)getUChar(delta_start+5),
31713 (Int)getUChar(delta_start+6),
31714 (Int)getUChar(delta_start+7) );
31715 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
31716 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
31717 getRexX(pfx), getRexB(pfx));
31718 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
31719 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
31720 getVexNvvvv(pfx),
31721 esc==ESC_NONE ? "NONE" :
31722 esc==ESC_0F ? "0F" :
31723 esc==ESC_0F38 ? "0F38" :
31724 esc==ESC_0F3A ? "0F3A" : "???");
31725 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
31726 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
31727 haveF3(pfx) ? 1 : 0);
31728 }
sewardjdf0e0022005-01-25 15:48:43 +000031729
31730 /* Tell the dispatcher that this insn cannot be decoded, and so has
31731 not been executed, and (is currently) the next to be executed.
31732 RIP should be up-to-date since it made so at the start of each
31733 insn, but nevertheless be paranoid and update it again right
31734 now. */
sewardj9e6491a2005-07-02 19:24:10 +000031735 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
sewardjc6f970f2012-04-02 21:54:49 +000031736 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
31737 vassert(dres.whatNext == Dis_StopHere);
31738 dres.len = 0;
sewardje9d8a262009-07-01 08:06:34 +000031739 /* We also need to say that a CAS is not expected now, regardless
31740 of what it might have been set to at the start of the function,
31741 since the IR that we've emitted just above (to synthesis a
31742 SIGILL) does not involve any CAS, and presumably no other IR has
31743 been emitted for this (non-decoded) insn. */
31744 *expect_CAS = False;
sewardj9e6491a2005-07-02 19:24:10 +000031745 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000031746
sewardj80611e32012-01-20 13:07:24 +000031747 // } /* switch (opc) for the main (primary) opcode switch. */
sewardjdf0e0022005-01-25 15:48:43 +000031748
31749 decode_success:
31750 /* All decode successes end up here. */
sewardjc6f970f2012-04-02 21:54:49 +000031751 switch (dres.whatNext) {
31752 case Dis_Continue:
31753 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
31754 break;
31755 case Dis_ResteerU:
31756 case Dis_ResteerC:
31757 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
31758 break;
31759 case Dis_StopHere:
31760 break;
31761 default:
31762 vassert(0);
31763 }
31764
sewardjdf0e0022005-01-25 15:48:43 +000031765 DIP("\n");
florian8e2d9712015-01-02 14:40:59 +000031766 dres.len = toUInt(delta - delta_start);
sewardj9e6491a2005-07-02 19:24:10 +000031767 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000031768}
31769
31770#undef DIP
31771#undef DIS
sewardjd20c8852005-01-20 20:04:07 +000031772
sewardj9e6491a2005-07-02 19:24:10 +000031773
31774/*------------------------------------------------------------*/
31775/*--- Top-level fn ---*/
31776/*------------------------------------------------------------*/
31777
31778/* Disassemble a single instruction into IR. The instruction
31779 is located in host memory at &guest_code[delta]. */
31780
sewardjdd40fdf2006-12-24 02:20:24 +000031781DisResult disInstr_AMD64 ( IRSB* irsb_IN,
florianbeac5302014-12-31 12:09:38 +000031782 Bool (*resteerOkFn) ( void*, Addr ),
sewardj984d9b12010-01-15 10:53:21 +000031783 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000031784 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000031785 const UChar* guest_code_IN,
sewardj9e6491a2005-07-02 19:24:10 +000031786 Long delta,
floriand4cc0de2015-01-02 11:44:12 +000031787 Addr guest_IP,
sewardja5f55da2006-04-30 23:37:32 +000031788 VexArch guest_arch,
floriancacba8e2014-12-15 18:58:07 +000031789 const VexArchInfo* archinfo,
31790 const VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000031791 VexEndness host_endness_IN,
sewardj442e51a2012-12-06 18:08:04 +000031792 Bool sigill_diag_IN )
sewardj9e6491a2005-07-02 19:24:10 +000031793{
sewardje9d8a262009-07-01 08:06:34 +000031794 Int i, x1, x2;
31795 Bool expect_CAS, has_CAS;
sewardj9e6491a2005-07-02 19:24:10 +000031796 DisResult dres;
31797
31798 /* Set globals (see top of this file) */
sewardja5f55da2006-04-30 23:37:32 +000031799 vassert(guest_arch == VexArchAMD64);
sewardj9e6491a2005-07-02 19:24:10 +000031800 guest_code = guest_code_IN;
sewardjdd40fdf2006-12-24 02:20:24 +000031801 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000031802 host_endness = host_endness_IN;
sewardj9e6491a2005-07-02 19:24:10 +000031803 guest_RIP_curr_instr = guest_IP;
31804 guest_RIP_bbstart = guest_IP - delta;
31805
31806 /* We'll consult these after doing disInstr_AMD64_WRK. */
31807 guest_RIP_next_assumed = 0;
31808 guest_RIP_next_mustcheck = False;
31809
sewardje9d8a262009-07-01 08:06:34 +000031810 x1 = irsb_IN->stmts_used;
31811 expect_CAS = False;
sewardjc6f970f2012-04-02 21:54:49 +000031812 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000031813 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000031814 callback_opaque,
sewardj442e51a2012-12-06 18:08:04 +000031815 delta, archinfo, abiinfo, sigill_diag_IN );
sewardje9d8a262009-07-01 08:06:34 +000031816 x2 = irsb_IN->stmts_used;
31817 vassert(x2 >= x1);
sewardj9e6491a2005-07-02 19:24:10 +000031818
31819 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
31820 got it right. Failure of this assertion is serious and denotes
31821 a bug in disInstr. */
31822 if (guest_RIP_next_mustcheck
31823 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
31824 vex_printf("\n");
31825 vex_printf("assumed next %%rip = 0x%llx\n",
31826 guest_RIP_next_assumed );
31827 vex_printf(" actual next %%rip = 0x%llx\n",
31828 guest_RIP_curr_instr + dres.len );
sewardje9d8a262009-07-01 08:06:34 +000031829 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
31830 }
31831
31832 /* See comment at the top of disInstr_AMD64_WRK for meaning of
31833 expect_CAS. Here, we (sanity-)check for the presence/absence of
31834 IRCAS as directed by the returned expect_CAS value. */
31835 has_CAS = False;
31836 for (i = x1; i < x2; i++) {
31837 if (irsb_IN->stmts[i]->tag == Ist_CAS)
31838 has_CAS = True;
31839 }
31840
31841 if (expect_CAS != has_CAS) {
31842 /* inconsistency detected. re-disassemble the instruction so as
31843 to generate a useful error message; then assert. */
31844 vex_traceflags |= VEX_TRACE_FE;
sewardjc6f970f2012-04-02 21:54:49 +000031845 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000031846 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000031847 callback_opaque,
sewardj442e51a2012-12-06 18:08:04 +000031848 delta, archinfo, abiinfo, sigill_diag_IN );
sewardje9d8a262009-07-01 08:06:34 +000031849 for (i = x1; i < x2; i++) {
31850 vex_printf("\t\t");
31851 ppIRStmt(irsb_IN->stmts[i]);
31852 vex_printf("\n");
31853 }
31854 /* Failure of this assertion is serious and denotes a bug in
31855 disInstr. */
31856 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
sewardj9e6491a2005-07-02 19:24:10 +000031857 }
31858
31859 return dres;
31860}
31861
31862
sewardj9a660ea2010-07-29 11:34:38 +000031863/*------------------------------------------------------------*/
31864/*--- Unused stuff ---*/
31865/*------------------------------------------------------------*/
31866
31867// A potentially more Memcheck-friendly version of gen_LZCNT, if
31868// this should ever be needed.
31869//
31870//static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
31871//{
31872// /* Scheme is simple: propagate the most significant 1-bit into all
31873// lower positions in the word. This gives a word of the form
31874// 0---01---1. Now invert it, giving a word of the form
31875// 1---10---0, then do a population-count idiom (to count the 1s,
31876// which is the number of leading zeroes, or the word size if the
31877// original word was 0.
31878// */
31879// Int i;
31880// IRTemp t[7];
31881// for (i = 0; i < 7; i++) {
31882// t[i] = newTemp(ty);
31883// }
31884// if (ty == Ity_I64) {
31885// assign(t[0], binop(Iop_Or64, mkexpr(src),
31886// binop(Iop_Shr64, mkexpr(src), mkU8(1))));
31887// assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
31888// binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
31889// assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
31890// binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
31891// assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
31892// binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
31893// assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
31894// binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
31895// assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
31896// binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
31897// assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
31898// return gen_POPCOUNT(ty, t[6]);
31899// }
31900// if (ty == Ity_I32) {
31901// assign(t[0], binop(Iop_Or32, mkexpr(src),
31902// binop(Iop_Shr32, mkexpr(src), mkU8(1))));
31903// assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
31904// binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
31905// assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
31906// binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
31907// assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
31908// binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
31909// assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
31910// binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
31911// assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
31912// return gen_POPCOUNT(ty, t[5]);
31913// }
31914// if (ty == Ity_I16) {
31915// assign(t[0], binop(Iop_Or16, mkexpr(src),
31916// binop(Iop_Shr16, mkexpr(src), mkU8(1))));
31917// assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
31918// binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
31919// assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
31920// binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
31921// assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
31922// binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
31923// assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
31924// return gen_POPCOUNT(ty, t[4]);
31925// }
31926// vassert(0);
31927//}
31928
sewardj9e6491a2005-07-02 19:24:10 +000031929
sewardjd20c8852005-01-20 20:04:07 +000031930/*--------------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +000031931/*--- end guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +000031932/*--------------------------------------------------------------------*/